metaseqr2 <- function(
counts,
sampleList,
excludeList=NULL,
fileType=c("auto","sam","bam","bed"),
path=NULL,
contrast=NULL,
libsizeList=NULL,
embedCols=list(
idCol=4,
gcCol=NA,
nameCol=NA,
btCol=NA
),
annotation=NULL,
org=c("hg18","hg19","hg38","mm9","mm10","rn5","rn6","dm3","dm6",
"danrer7","pantro4","susscr3","tair10","equcab2"),
refdb=c("ensembl","ucsc","refseq"),
version="auto",
transLevel=c("gene","transcript","exon"),
countType=c("gene","exon","utr"),
utrOpts=list(
frac=1,
minLength=300,
downstream=50
),
exonFilters=list(
minActiveExons=list(
exonsPerGene=5,
minExons=2,
frac=1/5
)
),
geneFilters=list(
length=list(
length=500
),
avgReads=list(
averagePerBp=100,
quantile=0.25
),
expression=list(
median=TRUE,
mean=FALSE,
quantile=NA,
known=NA,
custom=NA
),
biotype=getDefaults("biotypeFilter",org[1]),
presence=list(
frac=0.25,
minCount=10,
perCondition=FALSE
)
),
whenApplyFilter=c("postnorm","prenorm"),
normalization=c("deseq","deseq2","edaseq","edger","noiseq","nbpseq",
"absseq","dss","each","none"),
normArgs=NULL,
#statistics=c("deseq","deseq2","edger","noiseq","bayseq","limma","nbpseq",
# "absseq","dss"),
statistics=c("deseq","deseq2","edger","noiseq","limma","nbpseq","absseq",
"dss"),
statArgs=NULL,
adjustMethod=sort(c(p.adjust.methods,"qvalue")),
metaP=if (length(statistics)>1) c("simes","bonferroni","fisher",
"dperm_min","dperm_max","dperm_weight","fperm","whitlock","minp","maxp",
"weight","pandora","none") else "none",
weight=rep(1/length(statistics),length(statistics)),
nperm=10000,
pcut=NA,
logOffset=1,
pOffset=NULL,
preset=NULL, # An analysis strictness preset
qcPlots=c(
"mds","biodetection","countsbio","saturation","readnoise","filtered",
"correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
"meanvar","rnacomp","deheatmap","volcano","biodist","mastat","statvenn",
"foldvenn","deregulogram"
),
figFormat=c("png","jpg","tiff","bmp","pdf","ps"),
outList=FALSE,
exportWhere=NA, # An output directory for the project
exportWhat=c("annotation","p_value","adj_p_value","meta_p_value",
"adj_meta_p_value","fold_change","stats","counts","flags"),
exportScale=c("natural","log2","log10","vst","rpgm"),
exportValues=c("raw","normalized"),
exportStats=c("mean","median","sd","mad","cv","rcv"),
exportCountsTable=FALSE,
restrictCores=0.6,
report=TRUE,
reportTop=0.1,
reportTemplate="default",
saveGeneModel=TRUE,
verbose=TRUE,
runLog=TRUE,
reportDb=c("dexie","sqlite"),
localDb=file.path(system.file(package="metaseqR2"),"annotation.sqlite"),
offlineReport=TRUE,
createTracks=FALSE,
overwriteTracks=FALSE,
trackExportPath=file.path(exportWhere,"tracks"),
trackInfo=list(
stranded=FALSE,
normTo=1e+9,
urlBase="http://www.trackserver.me",
fasta=NULL,
gtf=NULL,
hubInfo=list(
name="MyHub",
shortLabel="My hub",
longLabel="My hub long",
email="someone@example.com"
)
),
.progressFun=NULL,
.exportR2C=FALSE,
...
) {
# Save function call for report
FUN_CALL <- deparse(sys.call())
# Check for older argument names and adjust
args <- as.list(match.call())
backCheckedArgs <- .backwardsConvertArgs(args)
if (backCheckedArgs$mixDetected)
stop("You must provide either old metaseqR arguments (e.g sample.list)",
" or new metaseqR2 arguments (e.g. sampleList)! Not both!")
if (backCheckedArgs$backDetected) {
argMap <- .backwardsMapOld2New()
# Check what happens with the new argument embedCols
if (!is.null(backCheckedArgs$args$embedCols)) {
embedCols <- backCheckedArgs$args$embedCols
backCheckedArgs$args$embedCols <- NULL
}
for (n in names(backCheckedArgs$args))
assign(argMap[[n]],eval(backCheckedArgs$args[[n]]))
# There are some nested arguments that also need correction
if (!is.null(backCheckedArgs$args$gene.filters)) {
if (!is.null(geneFilters$avg.reads)) {
geneFilters$avgReads <- geneFilters$avg.reads
if (!is.null(geneFilters$avg.reads$average.per.bp)) {
geneFilters$avgReads$averagePerBp <-
geneFilters$avg.reads$average.per.bp
geneFilters$avg.reads$average.per.bp <- NULL
}
geneFilters$avg.reads <- NULL
}
if (!is.null(geneFilters$presence)) {
if (!is.null(geneFilters$presence$min.count)) {
geneFilters$presence$minCount <-
geneFilters$presence$min.count
geneFilters$presence$min.count <- NULL
}
if (!is.null(geneFilters$presence$per.condition)) {
geneFilters$presence$perCondition <-
geneFilters$presence$per.condition
geneFilters$presence$per.condition <- NULL
}
}
}
if (!is.null(backCheckedArgs$args$exon.filters)) {
if (!is.null(exonFilters$min.active.exons)) {
exonFilters$minActiveExons <- exonFilters$min.active.exons
if (!is.null(exonFilters$min.active.exons$exons.per.gene)) {
exonFilters$minActiveExons$exonsPerGene <-
exonFilters$min.active.exons$exons.per.gene
exonFilters$min.active.exons$exons.per.gene <- NULL
}
if (!is.null(exonFilters$min.active.exons$min.exons)) {
exonFilters$minActiveExons$minExons <-
exonFilters$min.active.exons$min.exons
exonFilters$min.active.exons$min.exons <- NULL
}
exonFilters$min.active.exons <- NULL
}
}
if ("venn" %in% qcPlots)
qcPlots[which(qcPlots == "venn")] <- "statvenn"
if (annotation == "download")
annotation <- NULL
}
else
# Check if there are any mispelled or invalid parameters and throw a
# warning
checkMainArgs(args)
# Check essential arguments
fromRaw <- fromPrevious <- FALSE
if (missing(counts) && (missing(sampleList) || is.list(sampleList)))
stop("You must provide a file with genomic region (gene, exon, etc.) ",
"counts\nor an input targets file to create input from! If the ",
"counts file is\nmissing, sampleList cannot be missing or it must ",
"be a targets file\nwith at least three columns! See the ",
"readTargets function.\ncounts may also be a gene model list ",
"(see the documentation)")
sampleListPrev <- NULL
if (!missing(counts) && !missing(sampleList) && is.character(counts)
&& file.exists(counts) && length(grep(".RData$",counts))>0) {
warning("When restoring a previous analysis, if the sampleList ",
"argument is provided, it precedes the stored one. Also, the ",
"excludeList argument is ingored...",immediate.=TRUE)
fromPrevious <- TRUE
disp("Restoring previous analysis from ",basename(counts))
tmpEnv <- .backwardsCompatibility(counts)
sampleListPrev <- tmpEnv$sampleList
excludeList <- NULL
if (is.character(sampleList) && file.exists(sampleList)) {
theList <- readTargets(sampleList,path=path)
sampleList <- theList$samples
}
countType <- tmpEnv$countType
}
if (!missing(counts) && missing(sampleList) && is.character(counts)
&& file.exists(counts) && length(grep(".RData$",counts))>0) {
# Time to load previous analysis if existing
fromPrevious <- TRUE
message("Restoring previous analysis from ",basename(counts))
tmpEnv <- .backwardsCompatibility(counts)
sampleList <- tmpEnv$sampleList
countType <- tmpEnv$countType
}
if (missing(sampleList) && !fromPrevious || (!is.list(sampleList) &&
!file.exists(sampleList)))
stop("You must provide a list with condition names and sample names ",
"(same as in the counts file) or an input file to create the ",
"sample list from!")
if (!missing(sampleList) && !is.list(sampleList)
&& file.exists(sampleList) && !missing(counts) && !fromPrevious)
sampleList <- makeSampleList(sampleList)
if (!missing(sampleList) && !is.list(sampleList)
&& file.exists(sampleList) && missing(counts)) {
counts <- NULL
theList <- readTargets(sampleList,path=path)
sampleList <- theList$samples
fileList <- theList$files
if (tolower(fileType[1])=="auto")
fileType <- theList$type
if (is.null(fileType))
stop(paste("The type of the input files could not be recognized!",
"Please specify (BAM or BED)..."))
fromRaw <- TRUE
}
else
theList <- NULL
# If report requested with SQLite RSQLite must be present,also pander
if (report) {
if (!requireNamespace("pander"))
stop("R package pander is required to build metaseqR2 reports!")
if ("sqlite" %in% reportDb && !requireNamespace("RSQLite"))
stop("R package RSQLite is required to build metaseqR reports!")
}
# Initialize environmental variables
HOME <- system.file(package="metaseqR2")
TEMPLATE <- HOME
# Globalize the project's verbosity and logger
if (fromRaw)
PROJECT_PATH <- makeProjectPath(exportWhere)
else
PROJECT_PATH <- makeProjectPath(exportWhere,counts)
assign("VERBOSE",verbose,envir=metaEnv)
if (runLog) {
if (file.exists(file.path(PROJECT_PATH$logs,"metaseqr_run.log")))
unlink(file.path(PROJECT_PATH$logs,"metaseqr_run.log"))
logger <- create.logger(logfile=file.path(PROJECT_PATH$logs,
"metaseqr_run.log"),level=2,logformat="%d %c %m")
}
else
logger <- NULL
assign("LOGGER",logger,envir=metaEnv)
# Check if sample names match in file/df and list, otherwise meaningless to
# proceed
if (!fromRaw && !fromPrevious) {
if (!is.data.frame(counts) && !is.list(counts)) {
if (file.exists(counts)) {
aline <- read.delim(counts,nrows=5) # Read the 1st lines
aline <- colnames(aline)
}
else
stopwrap("The counts file you provided does not exist!")
}
else if (is.data.frame(counts))
aline <- colnames(counts)
else if (is.list(counts))
aline <- names(counts)
samples <- unlist(sampleList,use.names=FALSE)
if (length(which(!is.na(match(samples,aline)))) != length(samples))
stopwrap("The sample names provided in the counts file/list do ",
"not match with those of the sampleList!")
}
# If exclude list given, check that it's a subset of sampleList, otherwise
# just ignore excludeList
if (!is.null(excludeList) && !is.na(excludeList)) {
sl <- unlist(sampleList)
el <- unlist(excludeList)
if (length(intersect(sl,el)) != length(el)) {
warnwrap("Some samples in excludeList do not match those in the ",
"initial sampleList! Ignoring...",now=TRUE)
excludeList <- NULL
}
}
fileType <- tolower(fileType[1])
org <- tolower(org[1])
refdb <- tolower(refdb[1])
version <- version[1]
transLevel <- tolower(transLevel[1])
countType <- tolower(countType[1])
whenApplyFilter <- tolower(whenApplyFilter[1])
normalization <- tolower(normalization[1])
adjustMethod <- adjustMethod[1]
metaP <- tolower(metaP[1])
statistics <- tolower(statistics)
figFormat <- tolower(figFormat)
if (!is.null(qcPlots))
qcPlots <- tolower(qcPlots)
exportWhat <- tolower(exportWhat)
exportScale <- tolower(exportScale)
exportValues <- tolower(exportValues)
exportStats <- tolower(exportStats)
reportDb <- tolower(reportDb[1])
if (!is.null(preset))
preset <- tolower(preset[1])
if (fromRaw)
countsName <- "imported sam/bam/bed files"
else {
if (!is.data.frame(counts) && !is.null(counts) && !is.list(counts)) {
checkFileArgs("counts",counts)
if (fromPrevious)
countsName <- "previously stored project"
else
countsName <- basename(counts)
}
else if (is.list(counts) && !is.data.frame(counts)) {
countsName <- "previously stored gene model"
}
else {
countsName <- "imported custom data frame"
}
}
# annotation cannot be "embedded" when countType is other than "gene" or
# transLevel is other than "gene"
if (!is.null(annotation) && !is.list(annotation)
&& is.character(annotation) && annotation == "embedded"
&& countType != "gene")
stopwrap("The annotation argument cannot be \"embedded\" when the ",
"countType argument is other than \"gene\"!")
# annotation must be a list to be fed to buildCustomAnnotation
if (is.list(annotation)) {
# members are checked by buildCustomAnnotation if required
# We only need to check that the gtfFile exists here
if (!("gtf" %in% names(annotation)))
stopwrap("A gtf field must be provided with an existing GTF file ",
"when providing a list with custom annotation elements!")
if ("gtf" %in% names(annotation) && is.character(annotation$gtf)
&& !file.exists(annotation$gtf))
stopwrap("An existing GTF file must be provided when providing a ",
"list with custom annotation elements!")
}
if (is.list(counts) && !is.data.frame(counts)
&& (countType=="exon" || countType=="utr")
&& annotation=="embedded") {
warnwrap("annotation cannot be \"embedded\" when importing a stored ",
"gene model! Setting to NULL...")
#annotation <- "download"
annotation <- NULL
}
if (metaP %in% c("weight","pandora","dperm_weight") &&
abs(1-sum(weight))>1e-5)
stopwrap("The weights given for p-value combination should sum to 1!")
checkTextArgs("fileType",fileType,c("auto","sam","bam","bed"),
multiarg=FALSE)
if (!is.null(annotation) && !is.list(annotation)
&& !file.exists(annotation))
checkTextArgs("annotation",annotation,"embedded",multiarg=FALSE)
if (is.character(localDb) && file.exists(localDb)) {
if (!.userOrg(org,localDb) && is.null(annotation))
checkTextArgs("org",org,c("hg18","hg19","hg38","mm9","mm10","rn5",
"rn6","dm3","dm6","danrer7","pantro4","susscr3","tair10",
"equcab2"),multiarg=FALSE)
}
else if (is.null(annotation)
&& !(is.character(localDb) || file.exists(localDb))) {
# So only some annotations can be fetched on-the-fly
checkTextArgs("org",org,c("hg18","hg19","hg38","mm9","mm10","rn5","rn6",
"dm3","dm6","danrer7","pantro4","susscr3","tair10","equcab2"),
multiarg=FALSE)
}
if (is.character(localDb) && file.exists(localDb)) {
if (!.userRefdb(refdb,localDb) && is.null(annotation))
checkTextArgs("refdb",refdb,c("ensembl","ucsc","refseq"),
multiarg=FALSE)
}
else if (is.null(annotation)
&& !(is.character(localDb) || file.exists(localDb))) {
checkTextArgs("refdb",refdb,c("ensembl","ucsc","refseq"),multiarg=FALSE)
}
checkTextArgs("transLevel",transLevel,c("gene","transcript","exon"),
multiarg=FALSE)
checkTextArgs("countType",countType,c("gene","exon","utr"),
multiarg=FALSE)
checkTextArgs("whenApplyFilter",whenApplyFilter,c("postnorm",
"prenorm"),multiarg=FALSE)
checkTextArgs("normalization",normalization,c("edaseq","deseq","deseq2",
"edger","noiseq","nbpseq","absseq","dss","each","none"),multiarg=FALSE)
if (!any(is.na(statistics)))
#checkTextArgs("statistics",statistics,c("deseq","deseq2","edger",
# "noiseq","bayseq","limma","nbpseq","absseq","dss"),multiarg=TRUE)
checkTextArgs("statistics",statistics,c("deseq","deseq2","edger",
"noiseq","limma","nbpseq","absseq","dss"),multiarg=TRUE)
checkTextArgs("metaP",metaP,c("simes","bonferroni","fisher","dperm_min",
"dperm_max","dperm_weight","fperm","whitlock","minp","maxp","weight",
"pandora","none"),multiarg=FALSE)
checkTextArgs("figFormat",figFormat,c("png","jpg","tiff","bmp","pdf",
"ps"),multiarg=TRUE)
checkTextArgs("exportWhat",exportWhat,c("annotation","p_value",
"adj_p_value","meta_p_value","adj_meta_p_value","fold_change","stats",
"counts","flags"),multiarg=TRUE)
checkTextArgs("exportScale",exportScale,c("natural","log2","log10",
"rpgm","vst"),multiarg=TRUE)
checkTextArgs("exportValues",exportValues,c("raw","normalized"),
multiarg=TRUE)
checkTextArgs("exportStats",exportStats,c("mean","median","sd","mad",
"cv","rcv"),multiarg=TRUE)
checkTextArgs("reportDb",reportDb,c("sqlite","dexie"))
if (!is.null(preset))
checkTextArgs("preset",preset,c("all_basic","all_normal","all_full",
"medium_basic","medium_normal","medium_full","strict_basic",
"strict_normal","strict_full"),multiarg=FALSE)
if (!is.null(qcPlots))
checkTextArgs("qcPlots",qcPlots,c("mds","biodetection","countsbio",
"saturation","readnoise","correl","pairwise","boxplot","gcbias",
"lengthbias","meandiff","meanvar","rnacomp","deheatmap","volcano",
"biodist","filtered","mastat","statvenn","foldvenn","deregulogram"),
multiarg=TRUE)
if (!is.na(restrictCores)) checkNumArgs("restrictCores",restrictCores,
"numeric",c(0,1),"botheq")
if (!is.na(pcut))
checkNumArgs("pcut",pcut,"numeric",c(0,1),"botheq")
if (!is.null(embedCols$gcCol) && !is.na(embedCols$gcCol))
checkNumArgs("embedCols$gcCol",embedCols$gcCol,"numeric",0,"gt")
if (!is.null(embedCols$nameCol) && !is.na(embedCols$nameCol))
checkNumArgs("embedCols$nameCol",embedCols$nameCol,"numeric",0,"gt")
if (!is.null(embedCols$btCol) && !is.na(embedCols$btCol))
checkNumArgs("embedCols$btCol",embedCols$btCol,"numeric",0,"gt")
if (!is.na(logOffset))
checkNumArgs("logOffset",logOffset,"numeric",0,"gt")
if (!is.null(pOffset))
checkNumArgs("pOffset",pOffset,"numeric",c(0,1),"botheq")
checkNumArgs("nperm",nperm,"numeric",10,"gt")
if (!is.null(reportTop))
checkNumArgs("reportTop",reportTop,"numeric",c(0,1),"both")
if (!is.null(contrast)) {
checkContrastFormat(contrast,sampleList)
contrast <- unique(contrast)
}
#if ("bayseq" %in% statistics)
# libsizeList <- checkLibsize(libsizeList,sampleList)
# Check the genomic database version argument
if (is.character(version)) {
version <- tolower(version)
checkTextArgs("version",version,c("auto"),multiarg=FALSE)
}
else
checkNumArgs("version",version,"numeric")
# Check utrOpts if countType is "utr"
if (countType == "utr") {
utrOptsDef <- getDefaults("utrOpts")
if (!is.null(utrOpts$frac)) {
checkNumArgs("utrOpts$frac",utrOpts$frac,"numeric",c(0,1),"both")
utrOptsDef$frac <- utrOpts$frac
}
if (!is.null(utrOpts$minLength)) {
checkNumArgs("utrOpts$minLength",utrOpts$minLength,"numeric",0,
"gt")
utrOptsDef$minLength <- utrOpts$minLength
}
if (!is.null(utrOpts$downstream)) {
checkNumArgs("utrOpts$downstream",utrOpts$downstream,"numeric",0,
"gte")
utrOptsDef$downstream <- utrOpts$downstream
}
utrOpts <- utrOptsDef
}
# Check main functionality packages
checkPackages(metaP,qcPlots)
# Check the case of embedded annotation, not given gc and gene name columns
# Checks about countType have been performed before
if (!is.null(annotation) && annotation == "embedded") {
if (is.na(embedCols$gcCol) && countType=="gene"
&& normalization=="edaseq")
stopwrap("The column that contains the gene GC content ",
"(\"embedCols$gcCol\") argument is required when ",
"\"annotation\" is \"embedded\"!")
if (is.na(embedCols$nameCol) && !is.na(geneFilters$expression$known)) {
warnwrap("The column that contains the HUGO gene symbols ",
"(\"embedCols$nameCol\") is missing with embedded annotation! ",
"Gene name expression filter will not be available...")
geneFilters$expression$known=NA
if ("volcano" %in% qcPlots)
warnwrap("The column that contains the HUGO gene symbols ",
"(\"embedCols$nameCol\") is missing with embedded ",
"annotation! Interactive volcano plots will not contain ",
"gene names...")
}
if (is.na(embedCols$btCol) && countType=="gene") {
warnwrap("The column that contains the gene biotypes ",
"(\"embedCols$btCol\") is missing with embedded annotation! ",
"Biotype filters and certain plots will not be available...")
geneFilters$biotype=NULL
toRemove <- match(c("biodetection","countsbio","saturation",
"biodist","filtered","rnacomp","readnoise"),qcPlots)
noMatch <- which(is.na(toRemove))
if (length(noMatch)>0)
toRemove <- toRemove[-noMatch]
if (length(toRemove)>0)
qcPlots <- qcPlots[-toRemove]
}
}
if (org %in% c("hg18","hg38","mm10") && (refdb %in% c("ucsc","refseq"))) {
warnwrap("Gene/exon biotypes cannot be retrieved when organism is ",
"\"hg18\", \"hg38\", \"mm10\" and annotation database is ",
"\"ucsc\" or \"refseq\"!\nBiotype filters and certain plots will ",
"not be available...")
geneFilters$biotype=NULL
toRemove <- match(c("biodetection","countsbio","saturation",
"biodist","filtered"),qcPlots)
noMatch <- which(is.na(toRemove))
if (length(noMatch)>0)
toRemove <- toRemove[-noMatch]
if (length(toRemove)>0)
qcPlots <- qcPlots[-toRemove]
}
if (transLevel %in% c("transcript","exon")) {
warnwrap("When transLevel is \"transcript\" or \"exon\", the GC bias ",
"plots are not available.")
if ("gcbias" %in% qcPlots)
qcPlots <- qcPlots[-which(qcPlots == "gcbias")]
}
# In case someone wants a simple exon analysis but has not understood well
# the docs of transLevel and countType...
if (transLevel == "exon" && countType == "exon") {
warnwrap("You have chosen transLevel=\"exon\" and countType=\"exon\".",
"\nYou are probably trying to perform simple differential exon ",
"analysis. This can be perofmed\nby setting transLevel=\"exon\" ",
"and countType=\"gene\". Setting for you...")
countType <- "gene"
}
# Remove biodist and volcano if no statistics or contrast
if ("biodist" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a biodist diagram requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "biodist")]
}
if ("volcano" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a volcano plot requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "volcano")]
}
if ("deheatmap" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a deheatmap plot requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "deheatmap")]
}
if ("statvenn" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a statvenn plot requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "statvenn")]
}
if ("foldvenn" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a foldvenn plot requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "foldvenn")]
}
if ("deregulogram" %in% qcPlots && (any(is.na(statistics))
|| is.null(contrast))) {
warnwrap("The creation of a deregulogram plot requires statistical ",
"testing! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "deregulogram")]
}
# mastat only if there is a contrast, otherwise nothing to plot
if ("mastat" %in% qcPlots && is.null(contrast)) {
warnwrap("The creation of a mastat plot requires the definition of a ",
"contrast! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "mastat")]
}
# Check if drawing a Venn diagram among tests is possible
if ("statvenn" %in% qcPlots && length(statistics)==1
&& !any(is.na(statistics))) {
warnwrap("The creation of a Venn diagram among different statistical ",
"tests is possible only when more than\none statistical ",
"algorithms are used! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "statvenn")]
}
# Check if drawing a Venn diagram among contrasts is possible
if ("foldvenn" %in% qcPlots && !is.null(contrast) && length(contrast)==1) {
warnwrap("The creation of a Venn diagram among different statistical ",
"comparisons is possible only when more than\none contrast ",
"defined! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "foldvenn")]
}
# Check if drawing a deregulogram for contrast pairs
if ("deregulogram" %in% qcPlots && !is.null(contrast)
&& length(contrast)==1) {
warnwrap("The creation of a deregulogram for pairwise statistical ",
"comparisons is possible only when more than\none contrast ",
"defined! Removing from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "deregulogram")]
}
# Remove meandiff if there are no replicates
if ("meandiff" %in% qcPlots & any(lengths(sampleList) < 2)) {
warnwrap("The creation of a meandiff plot is not possible for single ",
"replicate conditions!\nRemoving from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "meandiff")]
}
# Remove rnacomp if there are more than 10 samples
if ("rnacomp" %in% qcPlots & sum(lengths(sampleList)) > 12) {
warnwrap("The creation of an rnacomp plot is not possible for more ",
"than 12 samples!\nRemoving from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "rnacomp")]
}
# Check if we have single-replicate conditions and presence$perCondition is
# enabled
if (any(lengths(sampleList) < 2) & !is.null(geneFilters$presence)) {
if (geneFilters$presence$perCondition) {
warnwrap("geneFilters$presence$perCondition cannot be TRUE when ",
"there are single-replicate conditions are present\nas this ",
"will cause sample drops! Setting to FALSE...")
geneFilters$presence$perCondition <- FALSE
}
}
# Check if tracks asked but we are in Windows...
if (createTracks && .Platform$OS.type == "windows") {
warnwrap("Signal tracks asked but Windows OS detected! Track ",
"generation is not possible in Windows. Ignoring...")
createTracks = FALSE
}
# Check if tracks asked but not starting with a targets file
if (createTracks && is.null(theList)) {
warnwrap("Signal tracks asked but no targets file with BAM file paths ",
"provided! Ignoring...")
createTracks <- FALSE
}
# Check if tracks asked but organism is not directly supported
if (createTracks && !(org %in% getSupportedOrganisms()))
warnwrap("Organism ",org," is not in the list of directly supported ",
"organisms! If the pipeline fails, please use the\n",
"createSignalTracks function directly.")
# Check additional input arguments for normalization and statistics
algArgs <- validateAlgArgs(normalization,statistics,normArgs,statArgs)
normArgs <- algArgs$normArgs
statArgs <- algArgs$statArgs
# Override settings if a preset is given
if (!is.null(preset)) {
presetOpts <- getPresetOpts(preset,org)
exonFilters <- presetOpts$exonFilters
geneFilters <- presetOpts$geneFilters
pcut <- presetOpts$pcut
exportWhat <- presetOpts$exportWhat
exportScale <- presetOpts$exportScale
exportValues <- presetOpts$exportValues
exportStats <- presetOpts$exportStats
}
if (report) {
reportMessages <- makeReportMessages("en")
if (!is.null(qcPlots) && !("png" %in% figFormat)) {
warnwrap("png format is required in order to build a report! ",
"Adding to figure output formats...")
figFormat <- c(figFormat,"png")
}
}
# Check for replicates. DESeq2 cannot perform without them and ABSSeq cannot
# perform complex model analysis if not provided with replicates.
if ("deseq2" %in% statistics && any(lengths(sampleList) < 2)) {
stopwrap("DESeq2 does not support analysis without ",
"replication. Please choose another algorithm or ",
"provide replicates.")
}
if (!is.null(contrast) && "absseq" %in% statistics) {
contrastListRepCheck <- makeContrastList(contrast,sampleList)
for (conName in names(contrastListRepCheck)) {
con <- contrastListRepCheck[[conName]]
cons <- unique(unlist(con))
# Checking if we have more than 2 conditions to compare
# simultaneously
if (length(cons)>2) {
if (any(lengths(sampleList) < 2))
# Checking if there are no replicates
stopwrap("ABSSeq cannot perform complex design",
"analysis without replicates.")
}
}
}
# Display initialization report
TB <- Sys.time()
disp(strftime(Sys.time()),": Data processing started...\n")
############################################################################
if ("dss" %in% statistics && any(lengths(sampleList) < 2)) {
message=c(
"\n=================================================\n",
"DSS will be run without replicates.\n",
"Results must be interpreted with caution.\n",
"===================================================\n"
)
disp(message)
}
disp("Read counts file: ",countsName)
disp("Conditions: ",paste(names(sampleList),collapse=", "))
disp("Samples to include: ",paste(unlist(sampleList),collapse=", "))
if (!is.null(excludeList) && !is.na(excludeList))
disp("Samples to exclude: ",paste(unlist(excludeList),collapse=", "))
else
disp("Samples to exclude: none")
if (!is.null(contrast))
disp("Requested contrasts: ",paste(contrast,collapse=", "))
else
disp("Requested contrasts: none")
if (!is.null(libsizeList)) {
disp("Library sizes: ")
for (n in names(libsizeList))
disp(" ",paste(n,libsizeList[[n]],sep=": "))
}
if (!is.null(annotation) && !is.list(annotation))
disp("Annotation: ",annotation)
if (is.list(annotation))
disp("Annotation: user provided GTF file")
disp("Organism: ",org)
disp("Reference source: ",refdb)
disp("Count type: ",countType)
if (countType == "utr") {
disp("3' UTR fraction: ",utrOpts$frac)
disp("3' UTR minimum length: ",utrOpts$minLength,"bps")
disp("3' UTR downstream: ",utrOpts$downstream,"bps")
}
if (!is.null(preset))
disp("Analysis preset: ",preset)
disp("Transcriptional level: ",transLevel)
if (!is.null(exonFilters)) {
disp("Exon filters: ",paste(names(exonFilters),collapse=", "))
for (ef in names(exonFilters)) {
disp(" ",ef,": ")
for (efp in names(exonFilters[[ef]])) {
if (length(exonFilters[[ef]][[efp]])==1 &&
is.function(exonFilters[[ef]][[efp]]))
print(exonFilters[[ef]][[efp]])
else if (length(exonFilters[[ef]][[efp]])==1)
disp(" ",paste(efp,exonFilters[[ef]][[efp]],sep=": "))
else if (length(exonFilters[[ef]][[efp]])>1)
disp(" ",paste(efp,paste(exonFilters[[ef]][[efp]],
collapse=", "),sep=": "))
}
}
}
else
disp("Exon filters: none applied")
if (!is.null(geneFilters)) {
disp("Gene filters: ",paste(names(geneFilters),collapse=", "))
for (gf in names(geneFilters)) {
disp(" ",gf,": ")
for (gfp in names(geneFilters[[gf]])) {
if (length(geneFilters[[gf]][[gfp]])==1 &&
is.function(geneFilters[[gf]][[gfp]]))
print(geneFilters[[gf]][[gfp]])
else if (length(geneFilters[[gf]][[gfp]])==1)
disp(" ",paste(gfp,geneFilters[[gf]][[gfp]],sep=": "))
else if (length(geneFilters[[gf]][[gfp]])>1)
disp(" ",paste(gfp,paste(geneFilters[[gf]][[gfp]],
collapse=", "),sep=": "))
}
}
}
else
disp("Gene filters: none applied")
disp("Filter application: ",whenApplyFilter)
disp("Normalization algorithm: ",normalization)
if (!is.null(normArgs)) {
disp("Normalization arguments: ")
for (na in names(normArgs)) {
if (length(normArgs[[na]])==1 && is.function(normArgs[[na]])) {
disp(" ",na,": ")
disp(as.character(substitute(normArgs[[na]])))
}
else if (length(normArgs[[na]])==1)
disp(" ",paste(na,normArgs[[na]],sep=": "))
else if (length(normArgs[[na]])>1)
disp(" ",paste(na,paste(normArgs[[na]],collapse=", "),
sep=": "))
}
}
if (!any(is.na(statistics)))
disp("Statistical algorithm(s): ",paste(statistics,collapse=", "))
else
disp("Statistical algorithm(s): no testing selected")
if (!is.null(statArgs)) {
if (!any(is.na(statistics))) {
disp("Statistical arguments: ")
for (sa in names(statArgs)) {
if (length(statArgs[[sa]])==1 && is.function(statArgs[[sa]])) {
disp(" ",sa,": ")
disp(as.character(substitute(statArgs[[na]])))
}
else if (length(statArgs[[sa]])==1)
disp(" ",paste(sa,statArgs[[sa]],sep=": "))
else if (length(statArgs[[sa]])>1)
disp(" ",paste(sa,paste(statArgs[[sa]],collapse=", "),
sep=": "))
}
}
else
disp("Statistical arguments: no testing selected")
}
disp("Meta-analysis method: ",metaP)
disp("Multiple testing correction: ",adjustMethod)
if (!is.na(pcut))
disp("p-value threshold: ",pcut)
disp("Logarithmic transformation offset: ",logOffset)
if (!is.null(preset))
disp("Analysis preset: ",preset)
disp("Quality control plots: ",paste(qcPlots,collapse=", "))
disp("Figure format: ",paste(figFormat,collapse=", "))
if (!is.na(exportWhere))
disp("Output directory: ",exportWhere)
disp("Output data: ",paste(exportWhat,collapse=", "))
disp("Output scale(s): ",paste(exportScale,collapse=", "))
disp("Output values: ",paste(exportValues,collapse=", "))
if ("stats" %in% exportWhat)
disp("Output statistics: ",paste(exportStats,collapse=", "),"\n")
if (is.function(.progressFun)) {
text <- paste("Starting the analysis...")
.progressFun(detail=text)
}
############################################################################
# geneData are always required, unless annotation is embedded, where we
# need only geneData as embedded annotation is not allowed anywhere else
disp("Loading gene annotation...")
if (!is.null(annotation) && annotation == "embedded") {
# The following should work if annotation elements are arranged
# in MeV-like data style
if (!is.data.frame(counts)) {
disp("Reading counts file ",countsName,"...")
geneCounts <- read.delim(counts)
}
else
geneCounts <- counts
rownames(geneCounts) <- as.character(geneCounts[,embedCols$idCol])
allCols <- seq_len(ncol(geneCounts))
samCols <- match(unlist(sampleList),colnames(geneCounts))
samCols <- samCols[which(!is.na(samCols))]
annCols <- allCols[-samCols]
geneData <- geneCounts[,annCols]
geneCounts <- geneCounts[,samCols]
colnames(geneData)[embedCols$idCol] <- "gene_id"
if (!is.na(embedCols$gcCol)) {
colnames(geneData)[embedCols$gcCol] <- "gc_content"
if (max(geneData$gc_content<=1)) # Is already divided
geneData$gc_content = 100*geneData$gc_content
}
if (!is.na(embedCols$nameCol))
colnames(geneData)[embedCols$nameCol] <- "gene_name"
if (!is.na(embedCols$btCol))
colnames(geneData)[embedCols$btCol] <- "biotype"
geneData <- GRanges(geneData)
names(geneData) <- geneData$gene_id
# Remove saturation if no biotypes
if ("saturation" %in% qcPlots && (is.na(embedCols$btCol)
|| length(unique(geneData$biotype)) == 1)) {
warnwrap("The creation of an saturation plots is available for ",
"more than one biotype!\nRemoving from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "saturation")]
}
}
else {
#sitaType <- ifelse(transLevel=="gene","gene",
# ifelse(transLevel=="transcript","transcript",
# ifelse(transLevel=="exon","exon")))
summarized <- ifelse(transLevel %in% c("exon","transcript"),TRUE,FALSE)
geneData <- tryCatch(loadAnnotation(org,refdb,level=transLevel,
type="gene",version=version,summarized=summarized,db=localDb,
rc=restrictCores),
# sitadela::loadAnnotation(org,refdb,type=sitaType,
#version=version,db=localDb,rc=restrictCores)
error=function(e) {
# Not found and user based
if (!is.null(annotation)) { # Has been checked
gtfFile <- annotation$gtf
metadata <- annotation
metadata$gtf <- NULL
geneData <- importCustomAnnotation(gtfFile,metadata,
"gene","gene")
#geneData <- sitadela::importCustomAnnotation(gtfFile,metadata,
# sitaType)
}
else
stop("Please provide an existing organism or a list ",
"with annotation metadata and GTF file!")
},finally="")
# Remove saturation if no biotypes
if ("saturation" %in% qcPlots && length(unique(geneData$biotype))==1) {
warnwrap("The creation of an saturation plots is available for ",
"more than one biotype!\nRemoving from figures list...")
qcPlots <- qcPlots[-which(qcPlots == "saturation")]
}
}
# Now start examining additional required data per case...
if (countType=="exon") {
# We need to load the exon annotation and see what counts have been
# provided
if (!fromPrevious) {
# Load exon annotation
disp("Loading exon annotation...")
#sitaType <- ifelse(transLevel=="gene","exon",
# ifelse(transLevel=="transcript","transexon",
# ifelse(transLevel=="exon","exon")))
exonData <- tryCatch(loadAnnotation(org,refdb,level=transLevel,
type=countType,version=version,db=localDb,summarized=TRUE,
rc=restrictCores),
#sitadela::loadAnnotation(org,refdb,type=sitaType,
#version=version,db=localDb,summarized=TRUE,rc=restrictCores)
error=function(e) {
# Not found and user based
if (!is.null(annotation)) { # Has been checked
gtfFile <- annotation$gtf
metadata <- annotation
metadata$gtf <- NULL
exonData <- importCustomAnnotation(gtfFile,metadata,
"gene","exon")
#exonData <- importCustomAnnotation(gtfFile,metadata,
# sitaType)
}
else
stop("Please provide an existing organism or a list ",
"with annotation metadata and GTF file!")
},finally="")
# Load/read counts
if (!is.null(counts)) { # Provided
if (!is.data.frame(counts) && !is.list(counts)) {
disp("Reading counts file ",countsName,"...")
exonCounts <- read.delim(counts,row.names=1)
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
exonCounts <- exonCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
else { # Already a data frame as input
if (is.character(counts[,1])) {
exonCounts <- as.matrix(counts[,-1])
rownames(exonCounts) <- as.character(counts[,1])
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
exonCounts <- exonCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
else { # Should be named!
if (is.null(rownames(counts)))
stopwrap("A counts data frame as input should ",
"have rownames!")
exonCounts <- counts
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
exonCounts <- exonCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
}
}
else { # Coming from read2count
if (fromRaw) { # Double check
r2c <- read2count(theList,exonData,fileType,transLevel,
utrOpts,rc=restrictCores)
exonCounts <- r2c$counts
# Merged exon data! - Not needed anymore
#exonData <- r2c$mergedann
if (is.null(libsizeList))
libsizeList <- r2c$libsize
if (exportCountsTable) {
exonDataExp <- as.data.frame(exonData)
exonDataExp <- exonDataExp[,c(1,2,3,6,7,5,8,9)]
names(exonDataExp)[1] <- "chromosome"
disp("Exporting raw read counts table to ",
file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz"))
resFile <- file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz")
gzfh <- gzfile(resFile,"w")
write.table(cbind(
exonDataExp[rownames(exonCounts),],
exonCounts
),gzfh,sep="\t",row.names=FALSE,quote=FALSE)
close(gzfh)
}
}
}
exonCounts <- exonCounts[,unlist(sampleList,use.names=FALSE)]
# Get the exon counts per gene model
disp("Checking chromosomes in exon counts and gene annotation...")
geneData <- .reduceGeneData(exonData[rownames(exonCounts)],geneData)
exonData <- .reduceGeneData(exonData[rownames(exonCounts)],exonData)
disp("Processing exons...")
theCounts <- constructGeneModel(exonCounts,exonData,type="exon",
rc=restrictCores)
if (saveGeneModel) {
disp("Saving gene model to ",file.path(PROJECT_PATH[["data"]],
"gene_model.RData"))
save(theCounts,exonData,geneData,sampleList,transLevel,
countType,file=file.path(PROJECT_PATH$data,
"gene_model.RData"),compress=TRUE)
}
}
else {
theCounts <- tmpEnv$theCounts
exonData <- tmpEnv$exonData
geneData <- tmpEnv$geneData
}
# Exclude any samples not wanted (when e.g. restoring a previous project
# and having determined that some samples are of bad quality OR using
# a different condition set with a subset of known samples
if (!is.null(excludeList) && !is.na(excludeList)) {
for (n in names(excludeList)) {
sampleList[[n]] <- setdiff(sampleList[[n]],
excludeList[[n]])
if (length(sampleList[[n]])==0) # Removed whole condition
sampleList[n] <- NULL
}
# Attributes are lost when subsetting
# https://cran.r-project.org/web/packages/sticky/
#vignettes/introduction.html
theCounts <- .excludeSamplesFromList(theCounts,
unlist(sampleList,use.names=FALSE))
}
if (!is.null(sampleListPrev) && !identical(sampleList,sampleListPrev))
theCounts <- .excludeSamplesFromList(theCounts,
unlist(sampleList,use.names=FALSE))
# Apply exon filters
if (!is.null(exonFilters)) {
exonFilterOut <- filterExons(theCounts,geneData,sampleList,
exonFilters)
exonFilterResult <- exonFilterOut$result
exonFilterFlags <- exonFilterOut$flags
}
else
exonFilterResult <- exonFilterFlags <- NULL
disp("Summarizing count data...")
theGeneCounts <- theExonLengths <- vector("list",
length(unlist(sampleList)))
names(theGeneCounts) <- names(theExonLengths) <- names(theCounts)
for (n in names(theGeneCounts))
theGeneCounts[[n]] <- vapply(theCounts[[n]],sum,numeric(1))
geneCounts <- do.call("cbind",theGeneCounts)
# Based on the sum of their exon lengths
lengthList <- attr(theCounts,"lengthList")
geneLength <- vapply(lengthList,sum,numeric(1))
# Could also be
# geneLength <- attr(exonData,"activeLength")
# In case there are small differences between annotation data and
# external file, due to e.g. slightly different Ensembl versions
geneData <- geneData[rownames(geneCounts)]
totalGeneData <- geneData # We need this for some total stats
}
if (countType=="utr") {
# We need to load the utr annotation and see what counts have been
# provided
if (!fromPrevious) {
disp("Loading 3' UTR annotation...")
#sitaType <- ifelse(transLevel=="gene","utr",
# ifelse(transLevel=="transcript","transutr",
# ifelse(transLevel=="exon","utr")))
transcriptData <- tryCatch(loadAnnotation(org,refdb,
level=transLevel,type=countType,version=version,db=localDb,
summarized=TRUE,rc=restrictCores),
# sitadela::loadAnnotation(org,refdb,
#type=sitaType,version=version,db=localDb,
#summarized=TRUE,rc=restrictCores)
error=function(e) {
# Not found and user based
if (!is.null(annotation)) { # Has been checked
gtfFile <- annotation$gtf
metadata <- annotation
metadata$gtf <- NULL
transcriptData <- importCustomAnnotation(gtfFile,metadata,
transLevel,countType)
#transcriptData <- sitadela::importCustomAnnotation(gtfFile,metadata,
# sitaType)
}
else
stop("Please provide an existing organism or a list with ",
"annotation metadata and GTF file!")
},finally="")
# Load/read counts
if (!is.null(counts)) { # Otherwise coming ready from read2count
if (!is.data.frame(counts) && !is.list(counts)) {
disp("Reading counts file ",countsName,"...")
transcriptCounts <- read.delim(counts, row.names=1)
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
transcriptCounts <- transcriptCounts[,unlist(sampleList,
use.names= FALSE)]
}
else { # Already a data frame as input
if (is.character(counts[,1])) {
transcriptCounts <- as.matrix(counts[,-1])
rownames(transcriptCounts) <- as.character(counts[,1])
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
transcriptCounts <- transcriptCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
else { # Should be named!
if (is.null(rownames(counts)))
stopwrap("A counts data frame as input should ",
"have rownames!")
transcriptCounts <- counts
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
transcriptCounts <- transcriptCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
}
}
else { # Coming from read2count
if (fromRaw) { # Double check
r2c <- read2count(theList,transcriptData,fileType,
transLevel,utrOpts,rc=restrictCores)
transcriptCounts <- r2c$counts
if (is.null(libsizeList))
libsizeList <- r2c$libsize
if (exportCountsTable) {
transcriptDataExp <- as.data.frame(transcriptData)
transcriptDataExp <-
transcriptDataExp[,c(1,2,3,6,7,5,8,9)]
names(transcriptDataExp)[1] <- "chromosome"
disp("Exporting raw read counts table to ",
file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz"))
resFile <- file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz")
gzfh <- gzfile(resFile,"w")
write.table(cbind(
transcriptDataExp[rownames(transcriptCounts),],
transcriptCounts
),gzfh,sep="\t",row.names=FALSE,quote=FALSE)
close(gzfh)
}
}
}
transcriptCounts <- transcriptCounts[,unlist(sampleList,
use.names=FALSE)]
# Get the transcript counts per gene model
disp("Checking chromosomes in transcript counts and gene ",
"annotation...")
geneData <-
.reduceGeneData(transcriptData[rownames(transcriptCounts)],
geneData)
transcriptData <-
.reduceGeneData(transcriptData[rownames(transcriptCounts)],
transcriptData)
disp("Processing transcripts...")
theCounts <- constructGeneModel(transcriptCounts,transcriptData,
type="utr",rc=restrictCores)
if (saveGeneModel) {
disp("Saving gene model to ",file.path(PROJECT_PATH[["data"]],
"gene_model.RData"))
save(theCounts,transcriptData,geneData,sampleList,countType,
transLevel,file=file.path(PROJECT_PATH$data,
"gene_model.RData"),compress=TRUE)
}
}
else {
theCounts <- tmpEnv$theCounts
transcriptData <- tmpEnv$transcriptData
geneData <- tmpEnv$geneData
}
# Exclude any samples not wanted (when e.g. restoring a previous project
# and having determined that some samples are of bad quality
if (!is.null(excludeList) && !is.na(excludeList)) {
for (n in names(excludeList)) {
sampleList[[n]] <- setdiff(sampleList[[n]],
excludeList[[n]])
if (length(sampleList[[n]])==0) # Removed whole condition
sampleList[n] <- NULL
}
#theCounts <- theCounts[unlist(sampleList)]
theCounts <- .excludeSamplesFromList(theCounts,
unlist(sampleList,use.names=FALSE))
}
disp("Summarizing count data...")
theGeneCounts <- theTranscriptLengths <- vector("list",
length(unlist(sampleList)))
names(theGeneCounts) <- names(theTranscriptLengths) <-
names(theCounts)
for (n in names(theGeneCounts))
theGeneCounts[[n]] <- vapply(theCounts[[n]],sum,numeric(1))
geneCounts <- do.call("cbind",theGeneCounts)
# Based on the sum of their transcript lengths
lengthList <- attr(theCounts,"lengthList")
geneLength <- vapply(lengthList,sum,numeric(1))
# Could also be
# geneLength <- attr(transcriptData,"activeLength")
# In case there are small differences between annotation data and
# external file, due to e.g. slightly different Ensembl versions
geneData <- geneData[rownames(geneCounts),]
totalGeneData <- geneData # We need this for some total stats
# No exon filters have been applied, we are at "utr" level
exonFilterResult <- NULL
}
if (countType=="gene") {
# geneData has already been loaded and also geneCounts in the case of
# embedded annotation
if (is.null(annotation) || annotation != "embedded") {
if (!fromPrevious) {
# Load/read counts
if (!is.null(counts)
&& (is.character(counts) || is.data.frame(counts))) {
if (!is.data.frame(counts)) { # Else it's already here
disp("Reading counts file ",countsName,"...")
geneCounts <- read.delim(counts,row.names=1)
## Re-arrange columns to be in consistency with colData
## used in statDeseq2
geneCounts <- geneCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
else { # Already a data frame as input
if (is.character(counts[,1])) {
geneCounts <- as.matrix(counts[,-1])
rownames(geneCounts) <- as.character(counts[,1])
## Re-arrange columns to be in consistency with
## colData used in statDeseq2
geneCounts <- geneCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
else { # Should be named!
if (is.null(rownames(counts)))
stopwrap("A counts data frame as input should ",
"have rownames!")
geneCounts <- counts
## Re-arrange columns to be in consistency with
## colData used in statDeseq2
geneCounts <- geneCounts[,unlist(sampleList,
use.names=FALSE),drop=FALSE]
}
}
}
else { # Coming from read2count
if (fromRaw) { # Double check
r2c <- read2count(theList,geneData,fileType,transLevel,
utrOpts,rc=restrictCores)
geneCounts <- r2c$counts
if (is.null(libsizeList))
libsizeList <- r2c$libsize
if (exportCountsTable) {
geneDataExp <- as.data.frame(geneData)
geneDataExp <- geneDataExp[,c(1,2,3,6,7,5,8,9)]
names(geneDataExp)[1] <- "chromosome"
disp("Exporting raw read counts table to ",
file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz"))
resFile <- file.path(PROJECT_PATH[["lists"]],
"raw_counts_table.txt.gz")
gzfh <- gzfile(resFile,"w")
write.table(cbind(
geneDataExp[rownames(geneCounts),],
geneCounts
),gzfh,sep="\t",row.names=FALSE,quote=FALSE)
close(gzfh)
}
}
}
}
else {
geneCounts <- tmpEnv$geneCounts
geneData <- tmpEnv$geneData
}
}
totalGeneData <- geneData # We need this for some total stats
exonFilterResult <- NULL
if (transLevel == "gene")
gValid <- intersect(rownames(geneCounts),geneData$gene_id)
else if (transLevel == "transcript")
gValid <- intersect(rownames(geneCounts),geneData$transcript_id)
else if (transLevel == "exon")
gValid <- intersect(rownames(geneCounts),geneData$exon_id)
geneCounts <- geneCounts[gValid,]
geneData <- geneData[rownames(geneCounts)]
geneLength <- width(geneData)
names(geneLength) <- names(geneData)
if (saveGeneModel) {
disp("Saving gene model to ",file.path(PROJECT_PATH[["data"]],
"gene_model.RData"))
save(geneCounts,geneData,sampleList,countType,
file=file.path(PROJECT_PATH$data,"gene_model.RData"),
compress=TRUE)
}
# Exclude any samples not wanted (when e.g. restoring a previous project
# and having determined that some samples are of bad quality
if (!is.null(excludeList) && !is.na(excludeList)) {
for (n in names(excludeList)) {
sampleList[[n]] <- setdiff(sampleList[[n]],
excludeList[[n]])
if (length(sampleList[[n]])==0) # Removed whole condition
sampleList[n] <- NULL
}
geneCounts <- geneCounts[,unlist(sampleList,use.names=FALSE)]
}
}
# Transform GC-content and biotype - should never be required in the new
# annotation strategy
if (is.null(geneData$gc_content))
geneData$gc_content <- rep(0.5,length(geneData))
if (is.null(geneData$biotype))
geneData$biotype <- rep("gene",length(geneData))
names(geneLength) <- rownames(geneCounts)
attr(geneData,"geneLength") <- geneLength
############################################################################
# BEGIN FILTERING SECTION
############################################################################
if (is.function(.progressFun)) {
text <- paste("Filtering...")
.progressFun(detail=text)
}
# GC bias is NOT alleviated if we do not remove the zeros!!!
disp("Removing genes with zero counts in all samples...")
theZeros <- which(apply(geneCounts,1,filterLow,0))
if (length(theZeros) > 0) {
# Store the filtered, maybe we do some stats
geneCountsZero <- geneCounts[theZeros,,drop=FALSE]
geneDataZero <- geneData[theZeros]
attr(geneDataZero,"geneLength") <- geneLength[theZeros]
theZeroNames <- names(geneData)[theZeros]
# Then remove
geneCounts <- geneCounts[-theZeros,,drop=FALSE]
geneData <- geneData[-theZeros]
attr(geneData,"geneLength") <- geneLength[-theZeros]
}
else
geneCountsZero <- geneDataZero <- theZeroNames <- NULL
# Store un-normalized gene counts for export purposes
geneCountsUnnorm <- geneCounts
# Apply filtering prior to normalization if desired
if (whenApplyFilter=="prenorm") {
# However, a first round of normalization has to be performed in
# order to get proper expression filters
disp("Prefiltering normalization with: ",normalization)
switch(normalization,
edaseq = {
tempGenes <- normalizeEdaseq(geneCounts,sampleList,
normArgs,geneData,output="matrix")
},
deseq = {
tempGenes <- normalizeDeseq(geneCounts,sampleList,normArgs,
output="matrix")
},
deseq2 = {
tempGenes <- normalizeDeseq2(geneCounts,sampleList,normArgs,
output="matrix")
},
edger = {
tempGenes <- normalizeEdger(geneCounts,sampleList,normArgs,
output="matrix")
},
noiseq = {
tempGenes <- normalizeNoiseq(geneCounts,sampleList,
normArgs,geneData,logOffset,output="matrix")
},
nbpseq = {
tempGenes <- normalizeNbpseq(geneCounts,sampleList,
normArgs,libsizeList,output="matrix")
},
absseq = {
tempGenes <- normalizeAbsseq(geneCounts,sampleList,normArgs,
output="matrix")
},
dss = {
tempGenes <- normalizeDss(geneCounts,sampleList,normArgs,
output="native")
},
none = {
# In case some external normalization is applied (e.g. equal
# read counts from all samples)
tempGenes <- geneCounts
}
)
# Now filter
if (!is.null(geneFilters)) {
geneFilterOut <- filterGenes(tempGenes,geneData,geneFilters,
sampleList)
geneFilterResult <- geneFilterOut$result
geneFilterCutoff <- geneFilterOut$cutoff
geneFilterFlags <- geneFilterOut$flags
}
else
geneFilterResult <- geneFilterCutoff <- geneFilterFlags <- NULL
# Unify the filters and filter
theDeadGenes <- list(
geneFilterResult$expression$median,
geneFilterResult$expression$mean,
geneFilterResult$expression$quantile,
geneFilterResult$expression$known,
geneFilterResult$expression$custom
)
theDead <- unique(unlist(c(geneFilterResult,exonFilterResult)))
# Some genes filtered by zero, were present in exon filters, not yet
# applied
if (countType=="exon")
theDead <- setdiff(theDead,theZeroNames)
# All method specific objects are row-index subsettable
if (length(theDead)>0) {
# Store the filtered for later export or some stats
geneCountsDead <- geneCounts[theDead,]
geneCountsUnnorm <- geneCountsUnnorm[theDead,]
geneDataDead <- geneData[theDead]
attr(geneDataDead,"geneLength") <- attr(geneData,
"geneLength")[theDead]
# Now filter
theDeadInd <- match(theDead,rownames(geneCounts))
geneCountsExpr <- geneCounts[-theDeadInd,]
geneDataExpr <- geneData[-theDeadInd]
attr(geneDataExpr,"geneLength") <- attr(geneData,
"geneLength")[-theDeadInd]
}
else {
geneCountsExpr <- geneCounts
geneDataExpr <- geneData
geneCountsDead <- geneDataDead <- geneCountsUnnorm <- NULL
}
if (is.function(.progressFun)) {
text <- paste("Normalizing...")
.progressFun(detail=text)
}
disp("Normalizing with: ",normalization)
switch(normalization,
edaseq = {
normGenes <- normalizeEdaseq(geneCountsExpr,sampleList,normArgs,
geneDataExpr,output="matrix")
},
deseq = {
normGenes <- normalizeDeseq(geneCountsExpr,sampleList,normArgs,
output="native")
},
deseq2 = {
normGenes <- normalizeDeseq2(geneCountsExpr,sampleList,normArgs,
output="native")
},
edger = {
normGenes <- normalizeEdger(geneCountsExpr,sampleList,normArgs,
output="native")
},
noiseq = {
normGenes <- normalizeNoiseq(geneCountsExpr,sampleList,normArgs,
geneDataExpr,logOffset,output="matrix")
},
nbpseq = {
normGenes <- normalizeNbpseq(geneCountsExpr,sampleList,normArgs,
libsizeList,output="native")
},
absseq = {
normGenes <- normalizeAbsseq(geneCountsExpr,sampleList,normArgs,
output="native")
},
dss = {
normGenes <- normalizeDss(geneCountsExpr,sampleList,normArgs,
output="native")
},
none = {
normGenes <- geneCountsExpr
}
)
normGenesExpr <- normGenes
}
else if (whenApplyFilter=="postnorm") {
if (is.function(.progressFun)) {
text <- paste("Normalizing...")
.progressFun(detail=text)
}
# Apply filtering after normalization if desired (default)
disp("Normalizing with: ",normalization)
switch(normalization,
edaseq = {
normGenes <- normalizeEdaseq(geneCounts,sampleList,
normArgs,geneData,output="matrix")
},
deseq = {
normGenes <- normalizeDeseq(geneCounts,sampleList,normArgs,
output="native")
},
deseq2 = {
normGenes <- normalizeDeseq2(geneCounts,sampleList,
normArgs,output="native")
},
edger = {
normGenes <- normalizeEdger(geneCounts,sampleList,normArgs,
output="native")
},
noiseq = {
normGenes <- normalizeNoiseq(geneCounts,sampleList,
normArgs,geneData,logOffset,output="matrix")
},
nbpseq = {
normGenes <- normalizeNbpseq(geneCounts,sampleList,
normArgs,libsizeList,output="native")
},
absseq = {
normGenes <- normalizeAbsseq(geneCounts,sampleList,
normArgs,output="native")
},
dss = {
normGenes <- normalizeDss(geneCounts,sampleList,normArgs,
output="native")
},
none = {
normGenes <- geneCounts
}
)
switch(class(normGenes)[1],
.CountDataSet = { # Has been normalized with DESeq
#tempMatrix <- round(DESeq::counts(normGenes,normalized=TRUE))
tempMatrix <- round(counts(normGenes,normalized=TRUE))
},
DESeqDataSet = { # Has been normalized with DESeq2
tempMatrix <- round(DESeq2::counts(normGenes,normalized=TRUE))
},
DGEList = { # Has been normalized with edgeR
# Trick found at http://cgrlucb.wikispaces.com/edgeR+spring2013
scl <- normGenes$samples$lib.size *
normGenes$samples$norm.factors
tempMatrix <- round(t(t(normGenes$counts)/scl)*mean(scl))
},
matrix = { # Has been normalized with EDASeq or NOISeq or nothing
tempMatrix <- normGenes
},
data.frame = { # Has been normalized with or nothing
tempMatrix <- as.matrix(normGenes)
},
nbData = { # Has been normalized with NBPSeq and main was "nbpseq"
tempMatrix <- as.matrix(round(sweep(normGenes$counts,2,
normGenes$norm.factors,"*")))
},
nbp = { # Has been normalized with NBPSeq and main was "nbsmyth"
tempMatrix <- as.matrix(round(normGenes$pseudo.counts))
},
ABSDataSet = { # Has been normalized with ABSSeq
tempMatrix <- as.matrix(round(excounts(normGenes)))
},
SeqCountSet = { # Has been normalized with DSS
# Dribble for taking a mtx out of SeqCountSet class
classes <- asClassVector(sampleList)
theDesign <- data.frame(condition=classes,
row.names=colnames(normGenes))
cds <- newCountDataSet(as.matrix(round(
assayData(normGenes)$exprs)),theDesign$condition)
#DESeq::sizeFactors(cds) <- normalizationFactor(normGenes)
sizeFactors(cds) <- normalizationFactor(normGenes)
#tempMatrix <- as.matrix(round(DESeq::counts(cds,
# normalized=TRUE)))
tempMatrix <- as.matrix(round(counts(cds,normalized=TRUE)))
}
)
# Implement gene filters after normalization
if (!is.null(geneFilters)) {
geneFilterOut <- filterGenes(tempMatrix,geneData,geneFilters,
sampleList)
geneFilterResult <- geneFilterOut$result
geneFilterCutoff <- geneFilterOut$cutoff
geneFilterFlags <- geneFilterOut$flags
}
else
geneFilterResult <- geneFilterCutoff <-
geneFilterFlags <- NULL
# Unify the filters and filter
theDeadGenes <- list(
geneFilterResult$expression$median,
geneFilterResult$expression$mean,
geneFilterResult$expression$quantile,
geneFilterResult$expression$known,
geneFilterResult$expression$custom
)
#when running maually exonFilterResult=NULL
theDead <- unique(unlist(c(geneFilterResult,exonFilterResult)))
# Some genes filtered by zero, were present in exon filters, not yet
# applied
if (countType=="exon")
theDead <- setdiff(theDead,theZeroNames)
# All method specific objects are row-index subsettable
if (length(theDead) > 0) {
# Store the filtered for later export or some stats
geneCountsDead <- tempMatrix[theDead,]
geneCountsUnnorm <- geneCountsUnnorm[theDead,]
geneDataDead <- geneData[theDead]
attr(geneDataDead,"geneLength") <- attr(geneData,
"geneLength")[theDead]
# Now filter
theDeadInd <- match(theDead,rownames(tempMatrix))
switch(class(normGenes)[1],
.CountDataSet = { # Has been normalized with DESeq
normGenesExpr <- normGenes[-theDeadInd,]
},
DESeqDataSet = { # Has been normalized with DESeq2
normGenesExpr <- normGenes[-theDeadInd,]
},
DGEList = { # edgeR bug???
normGenesExpr <- normGenes[-theDeadInd,]
normGenesExpr$AveLogCPM <-
normGenesExpr$AveLogCPM[-theDeadInd]
},
matrix = { # Has been normalized with EDASeq or NOISeq
normGenesExpr <- normGenes[-theDeadInd,]
},
data.frame = { # Has been normalized with EDASeq or NOISeq
normGenesExpr <- as.matrix(normGenes[-theDeadInd,])
},
nbData = { # Has been normalized NBPSeq, main.method="nbpseq"
normGenesExpr <- normGenes
normGenesExpr$counts <-
as.matrix(normGenesExpr$counts[-theDeadInd,])
normGenesExpr$rel.frequencies <-
normGenesExpr$rel.frequencies[-theDeadInd,]
normGenesExpr$tags <-
as.matrix(normGenesExpr$tags[-theDeadInd,])
},
nbp = {
normGenesExpr <- normGenes
normGenesExpr$counts <-
as.matrix(normGenesExpr$counts[-theDeadInd,])
normGenesExpr$pseudo.counts <-
as.matrix(normGenesExpr$pseudo.counts[-theDeadInd,])
normGenesExpr$pseudo.libSizes <-
colSums(as.matrix(normGenesExpr$pseudo.counts))*
rep(1,dim(normGenesExpr$counts)[2])
},
ABSDataSet = {
normGenesExpr <- normGenes
ABSSeq::counts(normGenesExpr) <-
ABSSeq::counts(normGenesExpr)[-theDeadInd,]
excounts(normGenesExpr) <-
excounts(normGenesExpr)[-theDeadInd,]
},
SeqCountSet = {
normGenesExpr <- normGenes
# Subset raw counts table
normGenesExpr <- normGenesExpr[-theDeadInd,]
# All the other tools here return a matrix. DSS must do the
# same. --> otherwise error in rbind of line 3192
classes <- asClassVector(sampleList)
theDesign <- data.frame(condition=classes,
row.names=colnames(normGenesExpr))
cds <- newCountDataSet(as.matrix(round(
assayData(normGenesExpr)$exprs)),theDesign$condition)
#DESeq::sizeFactors(cds) <-
# normalizationFactor(normGenesExpr)
sizeFactors(cds) <- normalizationFactor(normGenesExpr)
#normGenesExpr <- as.matrix(round(DESeq::counts(cds,
# normalized=TRUE)))
normGenesExpr <- as.matrix(round(counts(cds,
normalized=TRUE)))
}
)
geneCountsExpr <- geneCounts[rownames(normGenesExpr),]
geneDataExpr <- geneData[-theDeadInd]
attr(geneDataExpr,"geneLength") <-
attr(geneData,"geneLength")[-theDeadInd]
}
else {
normGenesExpr <- normGenes
geneCountsExpr <- geneCounts
geneDataExpr <- geneData
geneCountsDead <- geneDataDead <- geneCountsUnnorm <- NULL
}
}
# Store the final filtered, maybe we do some stats
geneDataFiltered <- c(geneDataZero,geneDataDead)
if (!is.null(geneDataFiltered) && length(geneDataFiltered) > 0) {
disp(length(geneDataFiltered)," genes filtered out")
if (!is.null(geneDataZero) && length(geneDataZero) > 0)
attr(geneDataFiltered,"geneLength") <- c(attr(geneDataZero,
"geneLength"),attr(geneDataDead,"geneLength"))
else
attr(geneDataFiltered,"geneLength") <-
attr(geneDataDead,"geneLength")
}
if (!is.null(geneFilters) || !is.null(exonFilters))
disp(length(geneDataExpr)," genes remain after filtering")
############################################################################
# END FILTERING SECTION
############################################################################
# There is a small case that no genes are left after filtering...
if(any(dim(normGenesExpr)==0))
stopwrap("No genes left after gene and/or exon filtering! Try again ",
"with no filtering or less strict filter rules...")
if (!is.null(contrast) && !any(is.na(statistics))) {
if (is.function(.progressFun)) {
text <- paste("Statistical testing...")
.progressFun(detail=text)
}
# Run the statistical test, normGenes is always a method-specific
# object, handled in the metaseqr.stat.R stat.* functions
cpList <- vector("list",length(contrast))
names(cpList) <- contrast
contrastList <- makeContrastList(contrast,sampleList)
for (n in names(cpList)) {
cpList[[n]] <- vector("list",length(statistics))
names(cpList[[n]]) <- statistics
}
for (alg in statistics) {
disp("Running statistical tests with: ",alg)
switch(alg,
deseq = {
pList <- statDeseq(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
deseq2 = {
pList <- statDeseq2(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
edger = {
pList <- statEdger(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
noiseq = {
pList <- statNoiseq(normGenesExpr,sampleList,contrastList,
statArgs[[alg]],geneDataExpr,logOffset)
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
#bayseq = {
# pList <- statBayseq(normGenesExpr,sampleList,contrastList,
# statArgs[[alg]],libsizeList)
# if (!is.na(pcut)) {
# for (con in names(contrastList))
# disp(" Contrast ",con,": found ",
# length(which(pList[[con]]<=pcut))," genes")
# }
#},
limma = {
pList <- statLimma(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
nbpseq = {
pList <- statNbpseq(normGenesExpr,sampleList,contrastList,
statArgs[[alg]],libsizeList)
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
absseq = {
pList <- statAbsseq(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
},
dss = {
pList <- statDss(normGenesExpr,sampleList,contrastList,
statArgs[[alg]])
# Order pList genes as in normGenesExpr, because dss outputs
# gene names in a different order
pList[[1]] <- pList[[1]][rownames(normGenesExpr)]
if (!is.na(pcut)) {
for (con in names(contrastList))
disp(" Contrast ",con,": found ",
length(which(pList[[con]]<=pcut))," genes")
}
}
)
for (n in names(pList))
cpList[[n]][[alg]] <- pList[[n]]
}
for (n in names(cpList))
cpList[[n]] <- do.call("cbind",cpList[[n]])
# Create the adjusted p-value matrices (if needed)
if ("adj_p_value" %in% exportWhat) {
adjCpList <- cmclapply(cpList,
function(x,a) return(apply(x,2,p.adjust,a)),adjustMethod,
rc=restrictCores)
for (n in names(cpList)) {
noi <- grep("noiseq",colnames(cpList[[n]]))
if (length(noi)>0) {
# DESeq has not run in this case, FDR cannot be calculated
if (length(strsplit(n,"_vs_")[[1]])==2)
adjCpList[[n]][,noi] <- rep(NA,nrow(cpList[[n]]))
}
}
}
else
adjCpList <- NULL
}
else
# No contrast provided or no staistical tests selected, some dummy
# p-value data must exist for export
cpList <- adjCpList <- NULL
if (!is.null(contrast)) # In case of only statistics=NA
contrastList <- makeContrastList(contrast,sampleList)
# At this point, all method-specific objects must become matrices for
# exporting and plotting
switch(class(normGenesExpr)[1],
.CountDataSet = { # Has been processed with DESeq
#normGenes <- round(DESeq::counts(normGenes,normalized=TRUE))
normGenes <- round(counts(normGenes,normalized=TRUE))
#normGenesExpr <- round(DESeq::counts(normGenesExpr,
# normalized=TRUE))
normGenesExpr <- round(counts(normGenesExpr,normalized=TRUE))
},
DESeqDataSet = { # Has been processed with DESeq2
normGenes <- round(DESeq2::counts(normGenes,normalized=TRUE))
normGenesExpr <- round(DESeq2::counts(normGenesExpr,
normalized=TRUE))
},
DGEList = { # Has been processed with edgeR
# Trick found at http://cgrlucb.wikispaces.com/edgeR+spring2013
sclR <- normGenes$samples$lib.size*normGenes$samples$norm.factors
normGenes <- round(t(t(normGenes$counts)/sclR)*mean(sclR))
sclN <- normGenesExpr$samples$lib.size *
normGenesExpr$samples$norm.factors
normGenesExpr <- round(t(t(normGenesExpr$counts)/sclN) *
mean(sclN))
},
nbData = {
normGenes <- as.matrix(round(sweep(normGenes$counts,2,
normGenes$norm.factors,"*")))
normGenesExpr <- as.matrix(round(sweep(normGenesExpr$counts,2,
normGenes$norm.factors,"*")))
},
nbp = {
normGenes <- as.matrix(round(normGenes$pseudo.counts))
normGenesExpr <- as.matrix(round(normGenesExpr$pseudo.counts))
},
ABSDataSet = {
normGenes <- as.matrix(round(excounts(normGenes)))
normGenesExpr <- as.matrix(round(excounts(normGenesExpr)))
},
SeqCountSet = {
# Way to take normalized counts out of DSS as a matrix
classes <- asClassVector(sampleList)
theDesign <- data.frame(condition=classes,
row.names=colnames(normGenes))
cds <- newCountDataSet(as.matrix(round(assayData(normGenes)$exprs)),
theDesign$condition)
#DESeq::sizeFactors(cds) <- normalizationFactor(normGenes)
sizeFactors(cds) <- normalizationFactor(normGenes)
#normGenes <- as.matrix(round(DESeq::counts(cds,normalized=TRUE)))
normGenes <- as.matrix(round(counts(cds,normalized=TRUE)))
cdsExpr <- newCountDataSet(as.matrix(round(assayData(
normGenesExpr)$exprs)),theDesign$condition)
#DESeq::sizeFactors(cdsExpr) <- normalizationFactor(normGenesExpr)
sizeFactors(cdsExpr) <- normalizationFactor(normGenesExpr)
#normGenesExpr <- as.matrix(round(DESeq::counts(cdsExpr,
# normalized=TRUE)))
normGenesExpr <- as.matrix(round(counts(cdsExpr,
normalized=TRUE)))
}
# We don't need the matrix case
)
# Now that everything is a matrix, export the normalized counts if asked
if (exportCountsTable) {
geneDataExprExp <- as.data.frame(geneDataExpr)
geneDataExprExp <- geneDataExprExp[,c(1,2,3,6,7,5,8,9)]
colnames(geneDataExprExp)[1] <- "chromosome"
geneDataFilteredExp <- as.data.frame(geneDataFiltered)
geneDataFilteredExp <- geneDataFilteredExp[,c(1,2,3,6,7,5,8,9)]
colnames(geneDataFilteredExp)[1] <- "chromosome"
disp("Exporting and compressing normalized read counts table to ",
file.path(PROJECT_PATH[["lists"]],"normalized_counts_table.txt"))
expo <- cbind(
rbind(geneDataExprExp,geneDataFilteredExp),
rbind(normGenesExpr,geneCountsZero,geneCountsDead)
)
resFile <- file.path(PROJECT_PATH[["lists"]],
"normalized_counts_table.txt.gz")
gzfh <- gzfile(resFile,"w")
write.table(expo,gzfh,sep="\t",row.names=FALSE,quote=FALSE)
close(gzfh)
}
# Calculate meta-statistics, if more than one statistical algorithm has been
# used
if (!is.null(contrast) && !any(is.na(statistics))) {
if (length(statistics)>1) {
sumpList <- metaTest(
cpList=cpList,
metaP=metaP,
counts=normGenesExpr,
sampleList=sampleList,
statistics=statistics,
statArgs=statArgs,
libsizeList=libsizeList,
nperm=nperm,
weight=weight,
pOffset=pOffset,
rc=restrictCores
)
}
# We assign the p-values from the only statistic used to sumpList in
# order to use it for stat plots
else
sumpList <- cpList
# Useless for one statistics but just for safety
if ("adj_meta_p_value" %in% exportWhat)
adjSumpList <- cmclapply(sumpList,
function(x,a) return(p.adjust(x,a)),adjustMethod,
rc=restrictCores)
else
adjSumpList <- NULL
}
else
sumpList <- adjSumpList <- NULL
############################################################################
# EXPORT SECTION
############################################################################
if (is.function(.progressFun)) {
text <- paste("Exporting...")
.progressFun(detail=text)
}
# Bind all the flags
if (countType=="gene")
flags <- geneFilterFlags
else if (countType=="utr")
flags <- geneFilterFlags
else if (countType=="exon") {
if (!is.null(exonFilterFlags)) {
flags <- cbind(geneFilterFlags,
as.matrix(exonFilterFlags[rownames(geneFilterFlags),]))
nams <- c(colnames(geneFilterFlags),colnames(exonFilterFlags))
rownames(flags) <- rownames(geneFilterFlags)
colnames(flags) <- nams
}
else
flags <- geneFilterFlags
}
disp("Building output files...")
if (outList)
out <- makeExportList(contrast)
else
out <- NULL
if (report)
html <- makeExportList(contrast)
else
html <- NULL
if ("rpgm" %in% exportScale)
fa <- attr(geneData,"geneLength")
else
fa <- NULL
if ("normalized" %in% exportValues) {
fac <- fa[rownames(normGenesExpr)]
normList <- makeTransformation(normGenesExpr,exportScale,fac,logOffset)
}
else
normList <- NULL
if ("raw" %in% exportValues) {
fac <- fa[rownames(geneCountsExpr)]
rawList <- makeTransformation(geneCountsExpr,exportScale,fac,logOffset)
}
else
rawList <- NULL
if ("flags" %in% exportWhat)
goodFlags <- flags[rownames(normGenesExpr),]
else
goodFlags <- NULL
if (!is.null(geneCountsZero) || !is.null(geneCountsDead)) {
geneCountsFiltered <- rbind(geneCountsZero,geneCountsDead)
geneCountsUnnormFiltered <- rbind(geneCountsZero,
geneCountsUnnorm)
if ("normalized" %in% exportValues) {
fac <- fa[rownames(geneCountsFiltered)]
normListFiltered <- makeTransformation(geneCountsFiltered,
exportScale,fac,logOffset)
}
else
normListFiltered <- NULL
if ("raw" %in% exportValues) {
fac <- fa[rownames(geneCountsUnnormFiltered)]
rawListFiltered <- makeTransformation(
geneCountsUnnormFiltered,exportScale,fac,logOffset)
}
else
rawListFiltered <- NULL
if ("flags" %in% exportWhat && !is.null(flags))
allFlags <- rbind(
matrix(1,nrow(geneCountsZero),ncol(flags)),
flags[rownames(geneCountsDead),]
)
else
allFlags <- NULL
}
else {
geneCountsFiltered <- NULL
geneCountsUnnormFiltered <- NULL
allFlags <- NULL
}
# Export structure for SeqCVIBE
if (.exportR2C) {
actLen <- c(attr(geneDataExpr,"geneLength"),
attr(geneDataFiltered,"geneLength"))
names(actLen) <- c(names(geneDataExpr),names(geneDataFiltered))
disp("Saving more count data to ",file.path(PROJECT_PATH[["data"]],
"count_data.RData"))
.createR2CExport(
rbind(geneCountsExpr,geneCountsUnnormFiltered),
rbind(normGenesExpr,geneCountsFiltered),
c(geneDataExpr,geneDataFiltered),
actLen,
libsizeList,
file.path(PROJECT_PATH[["data"]],"count_data.RData")
)
}
if (!is.null(contrast)) {
reportTables <- vector("list",length(contrast))
names(reportTables) <- contrast
for (cnt in contrast) {
disp(" Contrast: ",cnt)
disp(" Adding non-filtered data...")
if (!any(is.na(statistics)))
theExport <- buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
cnt=cnt,
statistics=statistics,
rawList=rawList,
normList=normList,
pMat=cpList[[cnt]],
adjpMat=adjCpList[[cnt]],
sumP=sumpList[[cnt]],
adjSumP=adjSumpList[[cnt]],
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
logOffset=logOffset,
#report=report
report=FALSE
)
else
theExport <- buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
cnt=cnt,
rawList=rawList,
normList=normList,
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
logOffset=logOffset,
#report=report
report=FALSE
)
# Adjust the export based on what statistics have been done and a
# possible p-value cutoff
export <- theExport$textTable
colnames(export)[1] <- "chromosome"
if (report)
exportHtml <- theExport$htmlTable
if (!any(is.na(statistics))) { # Ordering based on p-value
if (!is.na(pcut)) {
if (length(statistics)>1) {
switch(metaP,
fisher = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
fperm = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
whitlock = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
dperm_min = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
dperm_max = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
dperm_weight = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
minp = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
maxp = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
weight = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
pandora = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
simes = {
cutInd <- which(sumpList[[cnt]]<=pcut)
},
none = {
cutInd <- which(sumpList[[cnt]]<=pcut)
}
)
pp <- sumpList[[cnt]][cutInd]
export <- export[cutInd,]
export <- export[order(pp),]
if (report) {
exportHtml <- exportHtml[cutInd,]
exportHtml <- exportHtml[order(pp),]
}
}
else {
cutInd <- which(sumpList[[cnt]]<=pcut)
pp <- sumpList[[cnt]][cutInd,]
export <- export[cutInd,]
export <- export[order(pp),]
if (report) {
exportHtml <- exportHtml[cutInd,]
exportHtml <- exportHtml[order(pp),]
}
}
}
else {
pp <- sumpList[[cnt]]
export <- export[order(pp),]
if (report)
exportHtml <- exportHtml[order(pp),]
}
}
# Final safety trigger
naInd <- grep("NA",rownames(export))
if (length(naInd)>0) {
export <- export[-naInd,]
if (report)
exportHtml <- exportHtml[-naInd,]
}
resFile <- file.path(PROJECT_PATH[["lists"]],
paste("metaseqr_sig_out_",cnt,".txt.gz",sep=""))
disp(" Writing output...")
gzfh <- gzfile(resFile,"w")
write.table(export,gzfh,quote=FALSE,row.names=FALSE,sep="\t")
close(gzfh)
if (outList)
out[[cnt]] <- export
if (!is.null(geneCountsZero) || !is.null(geneCountsDead)) {
disp(" Adding filtered data...")
theExportFiltered <- buildExport(
geneData=geneDataFiltered,
rawGeneCounts=geneCountsUnnormFiltered,
normGeneCounts=geneCountsFiltered,
flags=allFlags,
sampleList=sampleList,
cnt=cnt,
statistics=statistics,
rawList=rawListFiltered,
normList=normListFiltered,
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
logOffset=logOffset,
report=FALSE
)
# Now we should be having theExport and theExportFiltered. We do
# not generate html output for filtered or total results just a
# compressed text file. We thus have to append
# theExport$textTable and theExportFiltered$htmlTable before
# writing the final output...
exportAll <- rbind(theExport$textTable,
theExportFiltered$textTable)
# ...and order them somehow... alphabetically according to row
# names, as the annotation might not have been bundled...
exportAll <- exportAll[order(rownames(exportAll)),]
colnames(exportAll)[1] <- "chromosome"
# Here, both filtered and unfiltered genes are passed to the
# output list.
if (outList)
out[[cnt]] <- exportAll
resFile <- file.path(PROJECT_PATH[["lists"]],paste(
"metaseqr_all_out_",cnt,".txt.gz",sep=""))
disp(" Writing output...")
gzfh <- gzfile(resFile,"w")
write.table(exportAll,gzfh,quote=FALSE,row.names=FALSE,sep="\t")
close(gzfh)
}
# If report requested, build a more condensed summary table, while
# the complete tables are available for download
if (report) {
if (!any(is.na(statistics))) {
if (length(statistics) > 1) {
ew <- c("annotation","meta_p_value","adj_meta_p_value",
"fold_change","stats")
if (is.null(adjSumpList))
adjSumpList <- cmclapply(sumpList,
function(x,a) return(p.adjust(x,a)),
adjustMethod,rc=restrictCores)
}
else {
ew <- c("annotation","p_value","adj_p_value",
"fold_change","stats")
if (is.null(adjCpList))
adjCpList <- cmclapply(cpList,
function(x,a) return(p.adjust(x,a)),
adjustMethod,rc=restrictCores)
}
}
else
ew <- c("annotation","fold_change","stats")
esc <- "rpgm"
ev <- "normalized"
est <- "mean"
faR <- attr(geneDataExpr,"geneLength")
facR <- faR[rownames(normGenesExpr)]
normListR <- makeTransformation(normGenesExpr,esc,facR,
logOffset)
disp(" Adding report data...")
if (!any(is.na(statistics))) {
reportTables[[cnt]] <- buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
cnt=cnt,
statistics=statistics,
rawList=NULL,
normList=normListR,
pMat=cpList[[cnt]],
adjpMat=as.matrix(adjCpList[[cnt]]),
sumP=sumpList[[cnt]],
adjSumP=adjSumpList[[cnt]],
exportWhat=ew,
exportScale=esc,
exportValues=ev,
exportStats=est,
logOffset=logOffset,
report=FALSE
)$textTable
if (is.matrix(pp))
npp <- rownames(pp)
else
npp <- names(pp)
reportTables[[cnt]] <-
reportTables[[cnt]][npp[order(pp)],,drop=FALSE]
if (!is.null(reportTop)) {
plasm <- pcut
if (is.na(pcut))
plasm <- 0.05
topi <- ceiling(reportTop*length(which(pp<=plasm)))
if (topi == 0)
topi <- 10
#topi <- ceiling(reportTop*nrow(reportTables[[cnt]]))
reportTables[[cnt]] <-
reportTables[[cnt]][seq_len(topi),,drop=FALSE]
}
}
else {
reportTables[[cnt]] <- buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
cnt=cnt,
rawList=NULL,
normList=normListR,
exportWhat=ew,
exportScale=esc,
exportValues=ev,
exportStats=est,
logOffset=logOffset,
report=FALSE
)$textTable
if (!is.null(reportTop)) {
ii <- grep("fold_change_",colnames(reportTables[[cnt]]))
forOrder <- as.numeric(reportTables[[cnt]][,ii[1]])
oo <- sort(abs(forOrder),decreasing=TRUE,
index.return=TRUE)
topi <- ceiling(reportTop*length(oo$ix))
if (topi == 0)
topi <- 10
reportTables[[cnt]] <-
reportTables[[cnt]][oo$ix[seq_len(topi)],,
drop=FALSE]
}
}
}
}
}
else {
disp(" No contrast was specified, exporting only quantifications")
disp(" Adding non-filtered data...")
theExport <- buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
statistics=statistics,
rawList=rawList,
normList=normList,
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
logOffset=logOffset,
report=FALSE
)
export <- theExport$textTable
colnames(export)[1] <- "chromosome"
if (report)
exportHtml <- theExport$htmlTable
naInd <- grep("NA",rownames(export))
if (length(naInd)>0) {
export <- export[-naInd,]
if (report)
exportHtml <- exportHtml[-naInd,]
}
resFile <- file.path(PROJECT_PATH[["lists"]],"metaseqr_sig_out.txt.gz")
disp(" Writing output...")
gzfh <- gzfile(resFile,"w")
write.table(export,gzfh,quote=FALSE,row.names=FALSE,sep="\t")
close(gzfh)
if (outList)
out[[cnt]] <- export
if (!is.null(geneCountsZero) || !is.null(geneCountsDead)) {
disp(" Adding filtered data...")
theExportFiltered <- buildExport(
geneData=geneDataFiltered,
rawGeneCounts=geneCountsUnnormFiltered,
normGeneCounts=geneCountsFiltered,
flags=allFlags,
sampleList=sampleList,
statistics=statistics,
rawList=rawListFiltered,
normList=normListFiltered,
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
logOffset=logOffset,
report=FALSE
)
# Now we should be having theExport and theExportFiltered. We do
# not generate html output for filtered or total results just a
# compressed text file. We thus have to append
# theExport$textTable and theExportFiltered$htmlTable before
# writing the final output...
exportAll <- rbind(theExport$textTable,
theExportFiltered$textTable)
# ...and order them somehow... alphabetically according to row
# names, as the annotation might not have been bundled...
exportAll <- exportAll[order(rownames(exportAll)),]
colnames(exportAll)[1] <- "chromosome"
# Here, both filtered and unfiltered genes are passed to the
# output list.
if (outList)
out[[cnt]] <- exportAll
resFile <- file.path(PROJECT_PATH[["lists"]],
"metaseqr_all_out.txt.gz")
disp(" Writing output...")
gzfh <- gzfile(resFile,"w")
write.table(exportAll,gzfh,quote=FALSE,row.names=FALSE,sep="\t")
close(gzfh)
}
# If report requested, build a more condensed summary table, while
# the complete tables are available for download
if (report) {
ew <- c("annotation","stats")
esc <- "rpgm"
ev <- "normalized"
est <- "mean"
faR <- attr(geneDataExpr,"geneLength")
facR <- faR[rownames(normGenesExpr)]
normListR <- makeTransformation(normGenesExpr,esc,facR,
logOffset)
disp(" Adding report data...")
reportTables <- list(NT=buildExport(
geneData=geneDataExpr,
rawGeneCounts=geneCountsExpr,
normGeneCounts=normGenesExpr,
flags=goodFlags,
sampleList=sampleList,
statistics=statistics,
rawList=NULL,
normList=normListR,
exportWhat=ew,
exportScale=esc,
exportValues=ev,
exportStats=est,
logOffset=logOffset,
report=FALSE
)$textTable)
# Some kind of toping for the table must be done, most probably
# based on top expressed genes
if (!is.null(reportTop)) {
ii <- grep("rpgm",colnames(reportTables[[1]]))
forOrder <- as.matrix(reportTables[[1]][,ii])
avgRpgm <- apply(forOrder,1,mean,na.rm=TRUE)
oo <- sort(avgRpgm,decreasing=TRUE,index.return=TRUE)
topi <- ceiling(reportTop*length(oo$ix))
if (topi == 0)
topi <- 10
reportTables[[1]] <-
reportTables[[1]][oo$ix[seq_len(topi)],,drop=FALSE]
}
}
}
############################################################################
# END EXPORT SECTION
############################################################################
############################################################################
# BEGIN PLOTTING SECTION
############################################################################
if (is.function(.progressFun)) {
text <- paste("Plotting...")
.progressFun(detail=text)
}
# Check if we have more than 6 samples in total, pairwise plots are not
# meaningful
if (length(unlist(sampleList)) > 6 && "pairwise" %in% qcPlots) {
warnwrap("Pairwise sample comparison plot becomes indistinguishable ",
"for more than 6 samples! Removing from plots...")
qcPlots <- qcPlots[-which(qcPlots == "pairwise")]
}
if (!is.null(qcPlots)) {
disp("Creating quality control graphs...")
plots <- list(
raw=c("mds","biodetection","countsbio","saturation","readnoise",
"correl","pairwise"),
norm=c("boxplot","gcbias","lengthbias","meandiff","meanvar",
"rnacomp"),
stat=c("deheatmap","volcano","mastat","biodist","deregulogram"),
other=c("filtered"),
venn=c("statvenn","foldvenn")
)
figRaw <- figUnorm <- figNorm <- figStat <- figOther <- figVenn <-
vector("list",length(figFormat))
names(figRaw) <- names(figUnorm) <- names(figNorm) <-
names(figStat) <- names(figOther) <- names(figVenn) <-
figFormat
if (is.null(contrast))
contrastList <- NULL
for (fig in figFormat) {
disp("Plotting in ",fig," format...")
figRaw[[fig]] <- metaseqrPlot(geneCounts,sampleList,
annotation=geneData,plotType=intersect(qcPlots,plots$raw),
isNorm=FALSE,output=fig,path=PROJECT_PATH$qc)
figUnorm[[fig]] <- metaseqrPlot(geneCounts,sampleList,
annotation=geneData,plotType=intersect(qcPlots,plots$norm),
isNorm=FALSE,output=fig,path=PROJECT_PATH$normalization)
# The annotation dimensions change...
if (whenApplyFilter=="prenorm")
figNorm[[fig]] <- metaseqrPlot(normGenes,sampleList,
annotation=geneDataExpr,plotType=intersect(qcPlots,
plots$norm),isNorm=TRUE,output=fig,
path=PROJECT_PATH$normalization)
else if (whenApplyFilter=="postnorm")
figNorm[[fig]] <- metaseqrPlot(normGenes,sampleList,
annotation=geneData,plotType=intersect(qcPlots,
plots$norm),isNorm=TRUE,output=fig,
path=PROJECT_PATH$normalization)
figStat[[fig]] <- metaseqrPlot(normGenesExpr,sampleList,
annotation=geneDataExpr,contrastList=contrastList,
pList=sumpList,thresholds=list(p=pcut,f=1),
plotType=intersect(qcPlots,plots$stat),isNorm=TRUE,
output=fig,path=PROJECT_PATH$statistics)
if (!is.null(geneDataFiltered))
figOther[[fig]] <- metaseqrPlot(geneDataFiltered,
sampleList,annotation=totalGeneData,
plotType=intersect(qcPlots,plots$other),isNorm=FALSE,
output=fig,path=PROJECT_PATH$qc)
else
figOther[[fig]] <- NULL
if ("statvenn" %in% qcPlots)
figVenn[[fig]] <- metaseqrPlot(normGenesExpr,
sampleList,annotation=geneDataExpr,
contrastList=contrastList,pList=cpList,
thresholds=list(p=pcut,f=1),
plotType=intersect(qcPlots,plots$venn),
output=fig,path=PROJECT_PATH$statistics)
}
# Create also tracks if asked, all controls have been performed in the
# beginning, so targets exist
if (createTracks)
tLink <- createSignalTracks(theList,org,stranded=trackInfo$stranded,
normTo=trackInfo$normTo,urlBase=trackInfo$urlBase,
exportPath=trackExportPath,hubInfo=trackInfo$hubInfo,
fasta=trackInfo$fasta,gtf=trackInfo$gtf,
overwrite=overwriteTracks,rc=restrictCores)
else
tLink <- NULL
########################################################################
#assign("sampleList",sampleList,envir=parent.frame())
#assign("geneCounts",geneCounts,envir=parent.frame())
#assign("normGenes",normGenes,envir=parent.frame())
#assign("normGenesExpr",normGenes,envir=parent.frame())
#assign("sumpList",sumpList,envir=parent.frame())
#assign("cpList",cpList,envir=parent.frame())
#assign("contrastList",contrastList,envir=parent.frame())
#assign("geneData",geneData,envir=parent.frame())
#assign("geneDataExpr",geneDataExpr,envir=parent.frame())
########################################################################
}
############################################################################
# END PLOTTING SECTION
############################################################################
############################################################################
# BEGIN REPORTING SECTION
############################################################################
if (report) {
obj <- list(
sampleList=sampleList,
contrastList=contrastList,
qcPlots=qcPlots,
geneCounts=geneCounts,
geneData=geneData,
geneDataExpr=geneDataExpr,
geneDataFiltered=geneDataFiltered,
totalGeneData=totalGeneData,
normGenes=normGenes,
normGenesExpr=normGenesExpr,
cpList=cpList,
sumpList=sumpList,
pcut=pcut,
metaP=metaP,
whenApplyFilter=whenApplyFilter,
PROJECT_PATH=PROJECT_PATH
)
if (reportDb == "sqlite")
.createSqlPlotDb(obj)
else if (reportDb == "dexie")
.createDexiePlotDb(obj)
disp("Creating HTML report...")
if (!is.null(qcPlots)) {
# First create zip archives of the figures
disp("Compressing figures...")
zipfiles <- file.path(PROJECT_PATH$plots,paste("metaseqr_figures_",
figFormat,".zip",sep=""))
names(zipfiles) <- figFormat
for (f in figFormat) {
files <- c(
dir(PROJECT_PATH$qc,pattern=paste(".",f,sep=""),
full.names=TRUE),
dir(PROJECT_PATH$normalization,pattern=paste(".",f,sep=""),
full.names=TRUE),
dir(PROJECT_PATH$statistics,pattern=paste(".",f,sep=""),
full.names=TRUE)
)
zip(zipfiles[f],files)
}
# Then create the final figure variables which brew will find...
figRaw <- figRaw[["png"]]
figUnorm <- figUnorm[["png"]]
figNorm <- figNorm[["png"]]
figStat <- figStat[["png"]]
figOther <- figOther[["png"]]
figVenn <- figVenn[["png"]]
}
else
figRaw <- figUnorm <- figNorm <- figStat <- figOther <-
figVenn <- NULL
# Then see what is going of if default report changed
if (tolower(reportTemplate)=="default") {
if (exists("TEMPLATE")) {
reportTemplate=list(
rmd=file.path(TEMPLATE,"metaseqr2_report.Rmd"),
loader=file.path(TEMPLATE,"dna_loader.gif")
)
}
else
reportTemplate=list(rmd=NULL,loader=NULL)
}
if (!is.null(reportTemplate$rmd)) {
if (file.exists(reportTemplate$rmd)) {
template <- reportTemplate$rmd
hasTemplate <- TRUE
}
else {
warnwrap(paste("The template file",reportTemplate$rmd,
"was not ","found! The HTML report will NOT be generated."))
hasTemplate <- FALSE
}
}
else {
warnwrap(paste("The report option was enabled but no template ",
"file is provided! The HTML report will NOT be generated."))
hasTemplate <- FALSE
}
if (!is.null(reportTemplate$loader)) {
if (file.exists(reportTemplate$loader))
file.copy(from=reportTemplate$loader,to=PROJECT_PATH$media)
else
warnwrap(paste("The report loader image",reportTemplate$loader,
"was not found!"))
}
else
warnwrap(paste("The report loader image was not provided!"))
# Here we must download all required libraries and put them in the js
# folder of the report to make it available offline
if (offlineReport) {
.downloadJsLibs(PROJECT_PATH,reportDb)
paceHeader <- paste0("<script type=\"text/javascript\" ",
"src=\"js/pace.min.js\"></script>")
}
else
paceHeader <- paste0("<script type=\"text/javascript\" ",
"src=\"https://raw.github.com/HubSpot/pace/v1.0.0/",
"pace.min.js\"></script>")
if (hasTemplate) {
execTime <- elap2human(TB)
REPORT_ENV <- .makeReportEnv(environment())
file.copy(file.path(TEMPLATE,"metaseqr2_report.Rmd"),
file.path(PROJECT_PATH$main,"metaseqr2_report.Rmd"),
overwrite=TRUE)
invisible(knitr::knit_meta(class=NULL,clean=TRUE))
render(
input=file.path(PROJECT_PATH$main,"metaseqr2_report.Rmd"),
output_file="index.html",
output_dir=PROJECT_PATH$main,
envir=REPORT_ENV
)
gc(verbose=FALSE)
# Remove the Rmd file after rendering the report
unlink(file.path(PROJECT_PATH$main,"metaseqr2_report.Rmd"))
####################################################################
# Replace a jQuery incompatibility bug line.. Hopefully to remove...
# Also add pace.js in header on the top... header-includes does not
# work...
L <- readLines(file.path(PROJECT_PATH$main,"index.html"))
# pace.js
hi <- grep("<head>",L)
if (length(hi) > 0)
L[hi] <- paste0(L[hi],"\n",paceHeader)
# jQuery bug
ln <- grep(paste("$(\".menu\").find(\"li[data-target=\" +",
"window.page + \"]\").trigger(\"click\");"),L,fixed=TRUE)
if (length(ln) == 1)
L[ln] <- paste("$(\".menu\").find(\"li[data-target='\" +",
"window.page + \"']\").trigger(\"click\");")
cat(L,file=file.path(PROJECT_PATH$main,"index.html"),sep="\n")
####################################################################
}
}
############################################################################
# END REPORTING SECTION
############################################################################
disp("\n",strftime(Sys.time()),": Data processing finished!\n")
execTime <- elap2human(TB)
disp("\n","Total processing time: ",execTime,"\n\n")
if (outList) {
tmp <- c(geneDataExpr,geneDataFiltered)
a <- c(attr(geneDataExpr,"geneLength"),
attr(geneDataFiltered,"geneLength"))
names(a) <- rownames(tmp)
attr(tmp,"geneLength") <- a
if (!is.null(cpList)) {
for (n in names(cpList)) {
if (!is.null(geneDataFiltered)) {
filler <- matrix(NA,length(geneDataFiltered),
ncol(cpList[[n]]))
rownames(filler) <- names(geneDataFiltered)
colnames(filler) <- colnames(cpList[[n]])
}
else {
filler <- NULL
cpList[[n]] <- rbind(cpList[[n]],filler)
cpList[[n]] <- cpList[[n]][rownames(tmp),,drop=FALSE]
}
}
}
if (!is.null(adjCpList)) {
for (n in names(adjCpList)) {
if (!is.null(geneDataFiltered)) {
filler <- matrix(NA,length(geneDataFiltered),
ncol(adjCpList[[n]]))
rownames(filler) <- names(geneDataFiltered)
colnames(filler) <- colnames(cpList[[n]])
}
else
filler <- NULL
adjCpList[[n]] <- rbind(adjCpList[[n]],filler)
adjCpList[[n]] <- adjCpList[[n]][rownames(tmp),,drop=FALSE]
}
}
if (!is.null(sumpList)) {
for (n in names(sumpList)) {
if (!is.null(geneDataFiltered)) {
filler <- rep(NA,length(geneDataFiltered))
names(filler) <- names(geneDataFiltered)
}
else
filler <- NULL
if (is.matrix(sumpList[[n]])) {
# The following if-else was added because some runs had
# filler=NULL while others do not. Sometimes this created a
# problem and the simulations stopped. Now fixed.
if (is.null(filler)) {
sumpList[[n]] <- rbind(sumpList[[n]],filler)
sumpList[[n]] <- sumpList[[n]][rownames(tmp),,
drop=FALSE]
}
else {
filler=as.matrix(filler)
sumpList[[n]] <- rbind(sumpList[[n]],filler)
sumpList[[n]] <- sumpList[[n]][rownames(tmp),,
drop=FALSE]
}
}
else {
sumpList[[n]] <- c(sumpList[[n]],filler)
sumpList[[n]] <- sumpList[[n]][rownames(tmp)]
}
}
}
if (!is.null(adjSumpList)) {
for (n in names(adjSumpList)) {
if (!is.null(geneDataFiltered)) {
filler <- rep(NA,length(geneDataFiltered))
names(filler) <- names(geneDataFiltered)
}
else
filler <- NULL
adjSumpList[[n]] <- c(adjSumpList[[n]],filler)
adjSumpList[[n]] <- adjSumpList[[n]][rownames(tmp)]
}
}
complete <- list(
#call=as.list(match.call()),
params=list(
sampleList=sampleList,
excludeList=excludeList,
fileType=fileType,
path=path,
contrast=contrast,
libsizeList=libsizeList,
embedCols=embedCols,
annotation=annotation,
org=org,
refdb=refdb,
countType=countType,
exonFilters=exonFilters,
geneFilters=geneFilters,
whenApplyFilter=whenApplyFilter,
normalization=normalization,
normArgs=normArgs,
statistics=statistics,
statArgs=statArgs,
adjustMethod=adjustMethod,
metaP=metaP,
weight=weight,
nperm=nperm,
pcut=pcut,
logOffset=logOffset,
preset=preset,
qcPlots=qcPlots,
figFormat=figFormat,
outList=outList,
exportWhere=exportWhere,
exportWhat=exportWhat,
exportScale=exportScale,
exportValues=exportValues,
exportStats=exportStats,
exportCountsTable=exportCountsTable,
restrictCores=restrictCores,
report=report,
reportTop=reportTop,
reportTemplate=reportTemplate,
saveGeneModel=saveGeneModel,
verbose=verbose,
runLog=runLog,
offlineReport=offlineReport,
createTracks=createTracks,
trackExportPath=trackExportPath,
trackInfo=trackInfo
),
filterCutoffs=list(
exonFilter=list(
minActiveExons=NULL
),
geneFilter=list(
length=geneFilters$length$length,
avgReads=if (is.null(geneFilterCutoff)) NULL else
round(geneFilterCutoff$avgReads,digits=5),
expression=list(
median=geneFilterCutoff$expression$median,
mean=geneFilterCutoff$expression$mean,
quantile=geneFilterCutoff$expression$quantile,
known=geneFilterCutoff$expression$known,
custom=geneFilterCutoff$expression$custom
),
biotype=if (is.null(geneFilters$biotype)) NULL else
paste(names(geneFilters$biotype)[which(
unlist(geneFilters$biotype))],collapse=", ")
),
zeroFiltered=length(theZeros),
exonFiltered=length(unique(unlist(exonFilterResult))),
geneFiltered=length(unique(unlist(geneFilterResult))),
totalFiltered=length(theZeros)+length(theDead)
),
geneData=tmp,
rawCounts=rbind(geneCountsExpr,geneCountsUnnormFiltered),
normCounts=rbind(normGenesExpr,geneCountsFiltered),
flags=rbind(goodFlags,allFlags),
sampleList=sampleList,
contrastList=contrast,
pValue=cpList,
fdr=adjCpList,
metaPValue=sumpList,
metaFdr=adjSumpList
)
return(list(data=out,html=html,complete=complete))
}
} # End metaseqr2
constructGeneModel <- function(countData,annoData,type,rc=NULL) {
# countData is a matrix, must have exact rownames as annoData GRanges
if (!all(rownames(countData) == names(annoData)))
stop("The rownames of the provided counts are not the same (or in ",
" the same order) as the provided annotation GRanges!")
# Messaging...
msg <- "provided count regions"
if (!missing(type)) {
if (type == "exon")
msg <- "exons"
else if (type == "utr")
msg <- "transcripts (UTR regions)"
}
# Create splitting factor
theGenes <- factor(annoData$gene_id,levels=unique(annoData$gene_id))
# Split the counts vectors...
theCounts <- cmclapply(colnames(countData),function(n,f,M) {
disp(" Separating ",msg," per gene for ",n,"...")
co <- M[,n]
names(co) <- rownames(M)
return(split(co,f))
},theGenes,countData,rc=rc)
names(theCounts) <- colnames(countData)
# and the width of the merged exons
w <- width(annoData)
if (type == "exon")
names(w) <- annoData$exon_id
else if (type == "utr")
names(w) <- annoData$transcript_id
lengthList <- split(w,theGenes)
attr(theCounts,"lengthList") <- lengthList
# The exon lengths information is essentially redundant in the new version,
# unless we need it for some QC distribution later on...
return(theCounts)
}
.reduceGeneData <- function(exonData,geneData) {
exonChrs <- unique(as.character(seqnames(exonData)))
geneChrs <- unique(as.character(seqnames(geneData)))
if (length(exonChrs) != length(geneChrs)) {
m <- as.integer(match(seqnames(geneData),exonChrs))
geneData <- geneData[which(!is.na(m))]
}
return(geneData)
}
.userOrg <- function(org,db=NULL) {
ua <- getUserAnnotations(db)
if (nrow(ua) == 0)
return(FALSE)
orgs <- unique(ua$organism)
return(org %in% orgs)
}
.userRefdb <- function(refdb,db=NULL) {
us <- getUserAnnotations(db)
if (nrow(us) == 0)
return(FALSE)
refdbs <- unique(us$source)
return(refdb %in% refdbs)
}
.createR2CExport <- function(raw,norm,gr,al,lib,output) {
# Ensure the same order
nameOrder <- names(gr)[order(names(gr))]
# Reorder
raw <- raw[nameOrder,]
norm <- norm[nameOrder,]
gr <- gr[nameOrder]
al <- al[nameOrder]
# Active length and annotation data frame
attr(gr,"geneLength") <- al
ann <- as.data.frame(gr)
ann <- ann[,c(1,2,3,6,7,5,8,9)]
colnames(ann)[1] <- "chromosome"
# Create the list to be exported, naming violation with dot(.) in order
# not to break current SeqCVIBE... Will correct in the future.
b2c.out <- list(
counts=raw,
norm=norm,
annotation=ann,
length=al,
libsize=lib
)
save(b2c.out,file=output,compress=TRUE)
}
.backwardsCompatibility <- function(dataFile) {
the.counts <- count.type <- sample.list <- gene.data <- transcript.data <-
exon.data <- NULL
tmpEnv <- new.env()
load(dataFile,tmpEnv)
if (!is.null(tmpEnv$the.counts)) { # Old file
# Copy to new
tmpEnv$theCounts <- tmpEnv$the.counts
tmpEnv$countsType <- tmpEnv$count.type
tmpEnv$sampleList <- tmpEnv$sampleList
tmpEnv$geneData <- tmpEnv$gene.data
if (!is.null(tmpEnv$transcript.data))
tmpEnv$transcriptData <- tmpEnv$transcript.data
if (!is.null(tmpEnv$exon.data))
tmpEnv$exonData <- tmpEnv$exon.data
# Delete old
if (exists("the.counts",envir=tmpEnv)) {
rm(the.counts,count.type,sample.list,gene.data,envir=tmpEnv)
if (!is.null(tmpEnv$transcript.data))
rm(transcript.data,envir=tmpEnv)
if (!is.null(tmpEnv$exon.data))
rm(exon.data,envir=tmpEnv)
}
}
return(tmpEnv)
}
.excludeSamplesFromList <- function(L,s) {
gl <- attr(L,"lengthList")
L <- L[s]
attr(L,"lengthList") <- gl
return(L)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.