exec/safeQuant.R

#!/usr/local/bin/Rscript

# Author: ahrnee-adm
###############################################################

############################################################### INIT ###############################################################
#### DEPENDANCIES

suppressWarnings(suppressPackageStartupMessages(library(stringr, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library("affy", quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library("limma", quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(gplots, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(seqinr, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(corrplot, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(optparse, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(data.table, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(magrittr, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(ggplot2, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(ggrepel, quiet=T)))
suppressWarnings(suppressPackageStartupMessages(library(dplyr, quiet=T)))

sourceDirOSX <- "~/dev/R/workspace/SafeQuant/R/"
sourceDirTPP <-  "/home/pcfuser/R/SafeQuant/R/"

# first check if dev or tpp mode
if(file.exists(sourceDirOSX) | file.exists(sourceDirTPP)){

	sourceDir <- ifelse(file.exists(sourceDirOSX),sourceDirOSX,sourceDirTPP)

	source(paste(sourceDir,"ExpressionAnalysis.R",sep=""))
	source(paste(sourceDir,"SafeQuantAnalysis.R",sep=""))
	source(paste(sourceDir,"Graphics.R",sep=""))
	source(paste(sourceDir,"GGGraphics.R",sep=""))
	source(paste(sourceDir,"IdentificationAnalysis.R",sep=""))
	source(paste(sourceDir,"Parser.R",sep=""))
	source(paste(sourceDir,"TMT.R",sep=""))
	source(paste(sourceDir,"UserOptions.R",sep=""))

}else if("SafeQuant" %in%  installed.packages()[,1]){ # used installed SafeQuant

	cat("Loading SafeQuant Library \n")
	library("SafeQuant")

}else{
	stop("SafeQuant Package not installed\n")
}

VERSION <- "2.3.4"

### USER CMD LINE OPTIONS
userOptions <- getUserOptions(version=VERSION)
### USER CMD LINE OPTIONS END

if(userOptions$verbose) print(userOptions$proteinQuant)

### SUPRESS WARNINGS
if(!userOptions$verbose){
	options(warn=-1)
}

#### DEPENDENCIES

if(userOptions$verbose) print(userOptions)

############################################################### PARSING ###############################################################

if(userOptions$verbose) cat("PARSING INPUT FILE \n")

# get file type
fileType <- .getFileType(userOptions$inputFile)

### Progenesis Export
if(fileType %in% c("ProgenesisProtein","ProgenesisFeature","ProgenesisPeptide")){

	# default
	expDesign <- getExpDesignProgenesisCsv(userOptions$inputFile)

	# get user specified experimental design
	if(!is.na(userOptions$expDesignTag)){
		# user specified
		expDesign <- expDesignTagToExpDesign(userOptions$expDesignTag,expDesign)
	}

	if(fileType == "ProgenesisProtein"){
		cat("INFO: PARSING PROGENESIS PROTEIN EXPORT FILE ",userOptions$inputFile, "\n" )
		eset <- parseProgenesisProteinCsv(file=userOptions$inputFile,expDesign=expDesign)

	}else if(fileType == "ProgenesisPeptide"){

		#"ProgenesisFeature"
		cat("INFO: PARSING PROGENESIS PEPTIDE EXPORT FILE ",userOptions$inputFile, "\n" )
		eset <- parseProgenesisPeptideMeasurementCsv(file=userOptions$inputFile,expDesign=expDesign, exclusivePeptides=userOptions$FExclusivePeptides)

	}else{ 	#"ProgenesisFeature"

		cat("INFO: PARSING PROGENESIS FEATURE EXPORT FILE ",userOptions$inputFile, "\n" )
		eset <- parseProgenesisFeatureCsv(file=userOptions$inputFile,expDesign=expDesign)
	}

# Scaffold Export (TMT data)
}else if(fileType == "ScaffoldTMT"){
	cat("INFO: PARSING SCAFFOLD RAW EXPORT FILE ",userOptions$inputFile, "\n" )

	# get default experimental design
	# six plex or ten plex ?
	# use default experimental design unless specified by the user
	nbPlex <- .getNbPlex(userOptions$inputFile)
	if(nbPlex == 6){
		# 6-plex default: 1,2,3:4,5,6
		expDesign <- data.frame(condition=paste("Condition",sort(rep(c(1,2),3)),sep=""),isControl=sort(rep(c(T,F),3),decreasing=T) )
	}else{
		# 10-plex default is "1,4,7,10:2,5,8:3,6,9"
		expDesign <- data.frame(condition=paste("Condition",c(1,2,3,1,2,3,1,2,3,1),sep=""),isControl=c(T,F,F,T,F,F,T,F,F,T) )
	}

	eset <- parseScaffoldRawFile(file=userOptions$inputFile,expDesign=expDesign)

	if(userOptions$TAdjustRatios){
		#if((nbPlex == 10)){ # only possible for tmt-10 plex
			nbCalMixSpectra <- sum( (fData(eset)$proteinName %in% names(CALIBMIXRATIOS)))
			if(nbCalMixSpectra < 100) stop("Not enough Calibration Mix spectra were found: ",nbCalMixSpectra, "\n ")
			cat("INFO: FOUND  ", nbCalMixSpectra ," Calibration Mix spectra\n")
			esetCalibMix <- .getCalibMixEset(eset)

			# discard calibration mix proteins
			eset <- eset[!(fData(eset)$proteinName %in% names(CALIBMIXRATIOS)),]

			intAdjObj <- .intensityAdjustment(eset, esetCalibMix)

		#}else{
		#	stop("Ratio Correction Not implemented for TMT 6-plex")
		#}
	}

	# get user specified experimental design
	if(!is.na(userOptions$expDesignTag)){
		# user specified
		expDesign <- expDesignTagToExpDesign(userOptions$expDesignTag,expDesign)
	}

#	eset <- parseScaffoldRawFile(file=userOptions$inputFile,expDesign=expDesign)
	# apply specified experimental design
	eset <- createExpressionDataset(expressionMatrix=exprs(eset)[,rownames(expDesign)],expDesign=expDesign,featureAnnotations=fData(eset))

	if(!is.na(userOptions$scaffoldPTMSpectrumReportFile)){

		cat("INFO: ADDING SCAFFOLD PTM ANNOTATIONS \n")
		eset <- addScaffoldPTMFAnnotations(eset,userOptions$scaffoldPTMSpectrumReportFile)

	}

}else if(fileType == "MaxQuantProteinGroup"){

	# get user specified experimental design
	if(!is.na(userOptions$expDesignTag)){
		# user specified
		expDesign <- expDesignTagToExpDesign(userOptions$expDesignTag,data.frame(condition=paste("Condition",1:1000),isControl=c(F,1000)) )
	}else{
		stop("Please Specify Experimental Design")
	}
	eset <- parseMaxQuantProteinGroupTxt(userOptions$inputFile,expDesign=expDesign, method="auc")

}else if(fileType == "GenericCSV"){

}else{
	stop("Unknown File Type", userOptions$inputFile)
}

# test option, limit number of entries
if(userOptions$test){
	eset <- eset[1:nrow(eset) %in% sample(1:min(300,c(nrow(eset))),replace=F),]
}

# parse .fasta file
if(!is.na(userOptions$proteinFastaFile)){
	cat("INFO: PARSING PROTEIN SEQUENCE DB ",userOptions$proteinFastaFile, "\n" )
	### read protein db
	proteinDB <- read.fasta(userOptions$proteinFastaFile,seqtype = "AA",as.string = TRUE, set.attributes = FALSE)

	# dirty fix check if ACs in Progenesis file are stripped
	if(isStrippedACs(sample(fData(eset)$proteinName,100))){
		cat("INFO: RE-FORMATTING ACCESSION NUMBERS\n")
		names(proteinDB) <- stripACs(names(proteinDB))
	}
}

############################################################### CREATE DATA MODEL ###############################################################

if(userOptions$verbose) print(eset)
if(userOptions$verbose) print(pData(eset))
if(userOptions$verbose) print(names(fData(eset)))

#### CREATE FEATURE DATA AND FILTER (pre-rollup)

# generic
filter <- data.frame(
		con=isCon(fData(eset)$proteinName)	# contaminants
		,ac = !(grepl(userOptions$selectedProteinName,fData(eset)$proteinName,ignore.case=T)) # protein ac
)

# do not filter TMT data
if("pMassError" %in% names(fData(eset))  &&  (fileType != "ScaffoldTMT") ){
	### applicable to Progenesis feature Exports

	if(is.na(userOptions$precursorMassFilter)){ # if not user specified
		# automatically get precursor limits, X * sd of 50% top scoring
		userOptions$precursorMassFilter <- getMeanCenteredRange(fData(eset)$pMassError[fData(eset)$idScore > quantile(fData(eset)$idScore,na.rm=T)[3]],nbSd = 3)
		filter <- cbind(filter, pMassError=
						(fData(eset)$pMassError < userOptions$precursorMassFilter[1])
						| (fData(eset)$pMassError > userOptions$precursorMassFilter[2]) # precursor mass tolerance
		)
	}
}

if("ptm" %in% names(fData(eset))){

	# add motif-X and ptm coordinates
	if(exists("proteinDB")){

		cat("INFO: EXTRACTING PTM COORDINATES AND MOTIFS\n")
		#format 1) progensis  2) scaffold
		eset <- .addPTMCoord(eset,proteinDB,motifLength=6, isProgressBar=T,format= (fileType == "ScaffoldTMT") +1)

	}
	filter <- cbind(filter
			, ptm = !(grepl(userOptions$selectedModifName,as.character(fData(eset)$ptm),ignore.case=T))
			, nbPtmsPerPeptide = (fData(eset)$nbPtmsPerPeptide > userOptions$maxNbPtmsPerPeptide) )

}

if("peptide" %in% names(fData(eset))){
	filter <- cbind(filter
			, peptideLength =nchar(as.character(fData(eset)$peptide)) < userOptions$minPeptideLength
			, charge =  fData(eset)$charge == 1 # discard singly charged
	)
}

if(!("nbPeptides" %in% names(fData(eset)))){
	### set nb peptides per protein
	eset <- setNbPeptidesPerProtein(eset)
}

filter <- cbind(filter,nbPeptides=(fData(eset)$nbPeptides < userOptions$minNbPeptidesPerProt))

# do not filter TMT data
#if(("idScore" %in% names(fData(eset))) && (fileType != "ScaffoldTMT")){
if(("idScore" %in% names(fData(eset)))){
	eset <- addIdQvalues(eset)
	filter <- cbind(filter,qvalue=fData(eset)$idQValue > userOptions$fdrCutoff)
}

# set pre-rollup filters
eset <- .setFilter(eset,filter=filter)

### make sure at least 1 feature pass the filter
if(sum(!fData(eset)$isFiltered,na.rm=T) == 0){
	stop("CHECK FILTER SETTINGS. ALL FEATURES WERE FILTERED OUT")
}

#### CREATE FEATURE DATA AND FILTER END

### SET ANCHOR PROTEINS
fData(eset)$isNormAnchor <- grepl(userOptions$normAC,fData(eset)$proteinName)

if(userOptions$verbose){
	cat("\nNB. ANCHOR PROTEINS: ")
	cat(sum(fData(eset)$isNormAnchor))
	cat("\n")
	print(fData(eset)$proteinName[fData(eset)$isNormAnchor])
	cat("\n")
}

### SET ANCHOR PROTEINS END

############################################################### EXPRESSION ANALYSIS ###############################################################

# create paired experiemntal design()
if(userOptions$ECorrelatedSamples){
	eset <- createPairedExpDesign(eset)
}

# add filters etc to adjusted expressionSet
# update expDesign of intAdjObj$esetAd
if(exists("intAdjObj")){
	fData(intAdjObj$esetAdj) <- fData(eset)
	pData(intAdjObj$esetAdj) <- pData(eset)
	exprs(intAdjObj$esetAdj) <- exprs(intAdjObj$esetAdj)[,colnames(exprs(eset))]
}

### non-pairwise stat test
statMethod <- c("")
if(userOptions$SNonPairWiseStatTest) statMethod <- c("all") #@TODO what about 'naRep'
if(userOptions$SRawDataAnalysis){ # No Normalization
	esetNorm <- eset
	if(exists("intAdjObj")) intAdjObj$esetAdjNorm <- intAdjObj$esetAdj
}else{ # NORMALIZE
	method <- c("global","median")
	# norm based on sum if norm anchor is specified
	if(sum(fData(eset)$isNormAnchor) < nrow(eset)) method <- c("global","sum")
	esetNorm <- sqNormalize(eset, method=method)
	if(exists("intAdjObj")) intAdjObj$esetAdjNorm <- sqNormalize(intAdjObj$esetAdj, method=method )
}

### MISSING VALUES IMPUTATION
# baselineIntensity <- getBaselineIntensity(as.vector(unlist(exprs(esetNorm)[,1])),promille=5)
# exprs(esetNorm)[  is.na(exprs(esetNorm)) | (exprs(esetNorm) <= 0)  ] <- 0
# exprs(esetNorm) <- exprs(esetNorm) + baselineIntensity
# if(exists("intAdjObj")){
# 	baselineIntensity <- getBaselineIntensity(as.vector(unlist(exprs(intAdjObj$esetAdjNorm)[,1])),promille=5)
# 	exprs(intAdjObj$esetAdjNorm)[  is.na(exprs(intAdjObj$esetAdjNorm)) | (exprs(intAdjObj$esetAdjNorm) <= 0)  ] <- 0
# 	exprs(intAdjObj$esetAdjNorm) <- exprs(intAdjObj$esetAdjNorm) + baselineIntensity
# }

esetNorm = sqImpute(esetNorm,method=userOptions$SMissingValuesImutationMethod)
if(exists("intAdjObj")){
   intAdjObj$esetAdjNorm = sqImpute(intAdjObj$esetAdjNorm,method=userOptions$SMissingValuesImutationMethod )
}

if((fileType == "ProgenesisProtein") |  (fileType == "MaxQuantProteinGroup")){

	fData(esetNorm)$isFiltered <- fData(esetNorm)$isFiltered  | isDecoy(fData(esetNorm)$proteinName)
	sqaProtein <- safeQuantAnalysis(esetNorm, method=statMethod, fcThrs=userOptions$ratioCutOff)
}else if((fileType == "ScaffoldTMT") && is.na(userOptions$scaffoldPTMSpectrumReportFile)){

	# roll-up protein level
	cat("INFO: ROLL-UP PROTEIN LEVEL\n")
	fData(esetNorm)$isFiltered <- fData(esetNorm)$isFiltered |  isDecoy(fData(esetNorm)$proteinName)

	# correct TMT ratios
	if(userOptions$TAdjustRatios){
		fData(intAdjObj$esetAdjNorm)$isFiltered <- fData(esetNorm)$isFiltered
		intAdjObjProt <- intAdjObj
		intAdjObjProt$esetAdjNorm <- rollUp(intAdjObj$esetAdjNorm,featureDataColumnName= c("proteinName"))
		sqaProtein <- safeQuantAnalysis(rollUp(esetNorm,featureDataColumnName= c("proteinName")), method=statMethod,intensityAdjustmentObj=intAdjObjProt, fcThrs=userOptions$ratioCutOff )
	}else{
		sqaProtein <- safeQuantAnalysis(rollUp(esetNorm,featureDataColumnName= c("proteinName")), method=statMethod , fcThrs=userOptions$ratioCutOff)
	}

	fData(sqaProtein$eset)$isFiltered <- fData(sqaProtein$eset)$isFiltered | isDecoy(fData(sqaProtein$eset)$proteinName) | (fData(sqaProtein$eset)$nbPeptides <  userOptions$minNbPeptidesPerProt)

}else{

	# roll-up peptide level
	cat("INFO: ROLL-UP PEPTIDE LEVEL\n")

	# correct TMT ratios
	if(userOptions$TAdjustRatios){
		cat("WARN: Ratio correction not yet implemented in this anlysis mode \n")
	}

	esetPeptide <- rollUp(esetNorm,featureDataColumnName= c("peptide","ptm"))

	# fdr filter
	# replace qValues by rollUp level qValues ()
	esetPeptide <- addIdQvalues(esetPeptide)

	if(fileType == "ScaffoldTMT"){
		fData(esetPeptide)$isFiltered <- fData(esetPeptide)$isFiltered | (fData(esetPeptide)$nbPeptides <  userOptions$minNbPeptidesPerProt)
	}else{
		# update filter to exclude peptide level hight qValues
		fData(esetPeptide)$isFiltered <- fData(esetPeptide)$isFiltered | (fData(esetPeptide)$idQValue > userOptions$fdrCutoff) | (fData(esetPeptide)$nbPeptides <  userOptions$minNbPeptidesPerProt)
	}

	if(userOptions$proteinQuant){
		cat("INFO: ROLL-UP PROTEIN LEVEL\n")
		esetProtein <- rollUp(esetPeptide,featureDataColumnName= c("proteinName"))
		esetProtein <- addIdQvalues(esetProtein)

		if(fileType == "ScaffoldTMT"){
			fData(esetProtein)$isFiltered <- fData(esetProtein)$isFiltered | isDecoy(fData(esetProtein)$proteinName) | (fData(esetProtein)$nbPeptides <  userOptions$minNbPeptidesPerProt)
		}else{
			fData(esetProtein)$isFiltered <- fData(esetProtein)$isFiltered | (fData(esetProtein)$idQValue > userOptions$fdrCutoff) | isDecoy(fData(esetProtein)$proteinName) | (fData(esetProtein)$nbPeptides <  userOptions$minNbPeptidesPerProt)
		}
		sqaProtein <- safeQuantAnalysis(esetProtein, method=statMethod, fcThrs=userOptions$ratioCutOff)

	}

	fData(esetPeptide)$isFiltered <- fData(esetPeptide)$isFiltered | isDecoy(fData(esetPeptide)$proteinName)
	sqaPeptide <- safeQuantAnalysis(esetPeptide, method=statMethod, fcThrs=userOptions$ratioCutOff)

	fData(esetNorm)$isFiltered <- fData(esetNorm)$isFiltered | isDecoy(fData(esetNorm)$proteinName) | (fData(esetNorm)$nbPeptides <  userOptions$minNbPeptidesPerProt)

	if(userOptions$top3 & userOptions$proteinQuant){
		cat("INFO: ROLL-UP TOP3\n")
		esetTop3 <-  rollUp(esetPeptide,featureDataColumnName= c("proteinName"), method="top3")
	}
}

### IBAQ
if(userOptions$iBAQ & userOptions$proteinQuant){
	cat("INFO: CALCULATING IBAQ VALUES\n")
	if(exists("proteinDB")){
		esetIBAQ <-  getIBAQEset(sqaProtein$eset, proteinDB=proteinDB)
	}else{
		cat("ERROR: proteinDB NOT FOUND NO iBAQ VALUES CALCULATED\n")
	}
}

### EXPRESSION ANALYSIS END

############################################################### EXPORTS ###############################################################
cat("INFO: PREPARING EXPORTS","\n")

#### SET WORKING DIR

if(!file.exists(userOptions$outputDir)) dir.create(userOptions$outputDir)
if(userOptions$verbose) cat("INFO: CREATED DIRECTORY",  userOptions$outputDir,"\n")
#### SET WORKING DIR

##I/O: set export file paths
userOptions$pdfFilePath <- file.path(userOptions$outputDir, paste(userOptions$resultsFileLabel,".pdf",sep=""))
userOptions$peptideReportFilePath <- file.path(userOptions$outputDir, paste0(userOptions$resultsFileLabel,"_PEPTIDE.",userOptions$sSheetExtension))
userOptions$proteinReportFilePath <- file.path(userOptions$outputDir, paste0(userOptions$resultsFileLabel,"_PROTEIN.",userOptions$sSheetExtension))
userOptions$paramsFilePath <- file.path(userOptions$outputDir, paste(userOptions$resultsFileLabel,"_SQ_PARAMS.TXT",sep=""))
userOptions$rDataFilePath <- file.path(userOptions$outputDir, paste(userOptions$resultsFileLabel,"_SQ.rData",sep=""))

############################################################### GRAPHICS ###############################################################

# plot protein or peptide level results
if(exists("sqaProtein")){
	sqaDisp <- sqaProtein
	lab <- "Protein"
}else{
	sqaDisp <- sqaPeptide
	lab <- "Peptide"
}
### only disp. a subset for some plots
rowSelEset <- 1:nrow(eset) %in% sample(nrow(eset),min(c(2000,nrow(eset))) ,replace=F)
rowSelSqaDisp <- 1:nrow(sqaDisp$eset) %in% sample(nrow(sqaDisp$eset),min(c(2000,nrow(sqaDisp$eset))) ,replace=F)

pdf(userOptions$pdfFile)
parDefault <- par()
CONDITIONCOLORS <- .getConditionColors(esetNorm)

### EXPDESIGN PLOT
plotExpDesign(esetNorm, version=VERSION)
### EXPDESIGN PLOT END

### IDENTIFICATION PLOTS
if(userOptions$verbose) cat("INFO: IDENTIFICATION PLOTS \n")
par(mfrow=c(2,2))

#.idOverviewPlots()
#@ NOT CRAN COMPATIBLE
.idOverviewPlots(userOptions=userOptions
		,esetNorm=esetNorm
		,fileType=fileType
		,sqaPeptide= ifelse(exists("sqaPeptide"),list(sqaPeptide),list(NA))[[1]]# HACK to pass check
		,sqaProtein= ifelse(exists("sqaProtein"),list(sqaProtein),list(NA))[[1]] # HACK to pass check
)

if(fileType %in% c("ProgenesisFeature","ProgenesisPeptide")){
	par(mfrow=c(3,2))
	.idPlots(eset, selection=c(1,3), main="Feature Level", qvalueThrs=userOptions$fdrCutoff, userOptions=userOptions)
	if(exists("sqaPeptide")) .idPlots(sqaPeptide$eset, selection=c(1,3), main="Peptide Level", qvalueThrs=userOptions$fdrCutoff)
	if(exists("sqaProtein")) .idPlots(sqaProtein$eset, selection=c(1,3), main="Protein Level", qvalueThrs=userOptions$fdrCutoff)
}
par(parDefault)
### IDENTIFICATIONS PLOTS END
### QUANT. QC PLOTS
if(userOptions$verbose) cat("INFO: QUANT QC. PLOTS \n")

### MASS ERROR
par(parDefault)
#if("pMassError" %in% names(fData(eset))){
if(fileType %in% c("ProgenesisFeature","ProgenesisPeptide")){
	par(mfrow=c(2,1), mar=c(4.5,6.1,4.1,6.1))
	plotPrecMassErrorDistrib(eset, pMassTolWindow=userOptions$precursorMassFilter)

	plotPrecMassErrorVsScore(eset[rowSelEset,], pMassTolWindow=userOptions$precursorMassFilter)
	par(parDefault)
}

layout(rbind(c(1,2), c(3,3)))
### missing values
missinValueBarplot(eset)

### total intensity sum
barplotMSSignal(eset)
par( mar=c(6.5,5.1,2.5,3.1))
cvBoxplot(sqaDisp$eset)

par(parDefault)

### CORRELATION PLOTS
### COR OR PAIRS PLOT. IF FEWER THAN X SAMPLES

if(ncol(sqaDisp$eset) < 8){
	pairsAnnot(log10(exprs(sqaDisp$eset))[rowSelSqaDisp & !fData(sqaDisp$eset)$isFiltered ,],textCol=as.character(CONDITIONCOLORS[pData(sqaDisp$eset)$condition,]))
}else{
	.correlationPlot(log10(exprs(sqaDisp$eset))[rowSelSqaDisp & !fData(sqaDisp$eset)$isFiltered,], labels=as.character(unique(pData(sqaDisp$eset)$condition)), textCol=as.character(CONDITIONCOLORS[pData(sqaDisp$eset)$condition,]))
}
### COR OR PAIRS PLOT. IF FEWER THAN X CONDITIONS
if(length(unique(pData(sqaDisp$eset)$condition)) < 8){
	pairsAnnot(log10(getSignalPerCondition(sqaDisp$eset[rowSelSqaDisp & !fData(sqaDisp$eset)$isFiltered,]))[,as.character(unique(pData(sqaDisp$eset)$condition)) ],textCol=as.character(CONDITIONCOLORS[as.character(unique(pData(sqaDisp$eset)$condition)),]))
}else{
	.correlationPlot(log10(getSignalPerCondition(sqaDisp$eset[rowSelSqaDisp & !fData(sqaDisp$eset)$isFiltered,]))[,as.character(unique(pData(sqaDisp$eset)$condition)) ],textCol=as.character(CONDITIONCOLORS[as.character(unique(pData(sqaDisp$eset)$condition)),]))
}

par(parDefault)

### TMT calibration mix
if(exists("intAdjObj")){

	# plot adjusted ratios vs org ratio
	# boxplot noise fraction
	#if(ncol(sqaDisp$ratio) > 1) par(mfrow=c(2,2))
	boxplot(intAdjObj$noiseFraction*100, border=ifelse(intAdjObj$selectedPairs,"blue","black")
			, ylab="Noise Fraction (%)",xlab="Calibration Mix Pair", cex.axis=1.5,cex.lab=1.5)

	if(ncol(sqaDisp$ratio) > 1) par(mfrow=c(2,2))
	plotAdjustedVsNonAdjustedRatio(sqaDisp$ratio,sqaDisp$unAdjustedRatio)
	par(parDefault)

}

### QUANT. QC PLOTS END

par(parDefault)
if(userOptions$verbose) cat("INFO: HEAT MAP \n")
#hClustHeatMapOld(sqaDisp$eset[(1:nrow(sqaDisp$eset) %in% sample(nrow(sqaDisp$eset),min(c(nrow(sqaDisp$eset),10000)))) &  !fData(sqaDisp$eset)$isFiltered,],main= paste(lab,"Level"))

# set smaller range if TMT
breaks=seq(-2,2,length=20)
if(fileType == "ScaffoldTMT" ){
  breaks=seq(-1.5,1.5,length=20)
}
hClustHeatMap(sqaDisp$eset[(1:nrow(sqaDisp$eset) %in% sample(nrow(sqaDisp$eset),min(c(nrow(sqaDisp$eset),10000)))) &  !fData(sqaDisp$eset)$isFiltered,]
              ,main= paste(lab,"Level")
              ,breaks = breaks)

### QUANT. STAT. PLOTS

### VAILD FEATURES VS. pValue/qValue
if(userOptions$verbose) cat("INFO: QUANT RES. PLOTS \n")

par(mfrow=c(1,2))
plotNbValidDeFeaturesPerFDR(sqaDisp,
		upRegulated=F
		,log2RatioCufOff=log2(userOptions$ratioCutOff)
		,pvalRange=c(0,0.15)
		,pvalCutOff=userOptions$deFdrCutoff
		,isLegend=T
		,isAdjusted=T
		,ylab=paste(lab, "Counts")
		,main="DOWN REGULATION"
)

plotNbValidDeFeaturesPerFDR(sqaDisp,
		upRegulated=T
		,log2RatioCufOff=log2(userOptions$ratioCutOff)
		,pvalRange=c(0,0.15)
		,pvalCutOff=userOptions$deFdrCutoff
		,isLegend=F
		,isAdjusted=T
		,ylab=paste(lab, "Counts")
		,main="UP REGULATION"
)

par(parDefault)

# plotVolcano(sqaDisp
# 		, main=paste(lab,"Level")
# 		, ratioThrs= userOptions$ratioCutOff
# 		, pValueThreshold= userOptions$deFdrCutoff
# 		, adjusted = T)

plotAllGGVolcanoes(sqaDisp
                   ,log2RatioThrs =userOptions$ratioCutOff %>% log2
                   ,pValueThrs= userOptions$deFdrCutoff
                   ,ylab = "log10 Adj. P-Value"
                   ,title = paste(lab,"Level")
                   ,textSize = 15
)

if(userOptions$eBayes){

	par(mfrow=c(1,2))
	plotNbValidDeFeaturesPerFDR(sqaDisp,
			upRegulated=F
			,log2RatioCufOff=log2(userOptions$ratioCutOff)
			,pvalRange=c(0,0.15)
			,pvalCutOff=userOptions$deFdrCutoff
			,isLegend=T
			,isAdjusted=F
			,ylab=paste(lab, "Counts")
			,main="DOWN REGULATION"
	)

	plotNbValidDeFeaturesPerFDR(sqaDisp,
			upRegulated=T
			,log2RatioCufOff=log2(userOptions$ratioCutOff)
			,pvalRange=c(0,0.15)
			,pvalCutOff=userOptions$deFdrCutoff
			,isLegend=F
			,isAdjusted=F
			,ylab=paste(lab, "Counts")
			,main="UP REGULATION"
	)
	par(parDefault)

	# plotVolcano(sqaDisp
	# 		, main=paste(lab,"Level")
	# 		, ratioThrs= userOptions$ratioCutOff
	# 		, pValueThreshold= userOptions$deFdrCutoff
	# 		, adjusted = F)

	plotAllGGVolcanoes(sqaDisp
	                   ,log2RatioThrs =userOptions$ratioCutOff %>% log2
	                   ,pValueThrs= userOptions$deFdrCutoff
	                   ,ylab = "log10 P-Value"
	                   ,title = paste(lab,"Level")
	                   ,textSize = 15
	                   ,isAdjusted = F
	)

	par(mfrow=c(2,2))
	if(nrow(CONDITIONCOLORS) > 4) par(mfrow=c(3,3))
	.allpValueHist(sqaDisp)
	plotQValueVsPValue(sqaDisp, lim=c(0,1))
	par(parDefault)
}

### QUANT. STAT. PLOTS END

par(parDefault)


### SOME ADDITIONAL QC PLOTS

if(userOptions$addQC){

#	if(exists("sqaPeptide")){
#		plotXYDensity(fData(sqaPeptide$eset)$retentionTime,fData(sqaPeptide$eset)$pMassError, disp=c("")
#			, xlab="Retention time (min)"
#			, ylab="Precursor Mass Error (ppm)"
#			, cex.axis=1.5
#			, cex.lab=1.5)

	if( all(c("retentionTime","pMassError")  %in% names(fData(eset)) )){
		plotXYDensity(fData(eset)$retentionTime,fData(eset)$pMassError, disp=c("")
			, xlab="Retention time (min)"
			, ylab="Precursor Mass Error (ppm)"
			, cex.axis=1.5
			, cex.lab=1.5)

		abline(h=c(userOptions$precursorMassFilter[1],0,userOptions$precursorMassFilter[2]),lty=2, lwd=2)

		# rt vs signal
		sel <- 1:nrow(esetNorm) %in% sample(nrow(esetNorm),min(c(4000,nrow(esetNorm))) ,replace=F) & (!(fData(esetNorm)$proteinName %in% names(CALIBMIXRATIOS)))
		plotRTNormSummary(esetNorm[sel,])

		par(mfrow=c(2,2))
		plotRTNorm(getRTNormFactors(esetNorm[sel,], minFeaturesPerBin=100),esetNorm[sel,])

		par(parDefault)

	}

	par(mfrow=c(2,2))
	#all ma plots
	for(s in colnames(exprs(esetNorm))){
		sel <- 1:nrow(esetNorm) %in% sample(nrow(esetNorm),min(c(4000,nrow(esetNorm))) ,replace=F) & (!(fData(esetNorm)$proteinName %in% names(CALIBMIXRATIOS)))
		maPlotSQ(esetNorm[sel,],sample=s)
	}

	par(parDefault)

}

cat("INFO: CREATED FILE ", userOptions$pdfFile,"\n")

graphics.off()

############################### GRAPHICS END

############################################################### SPREADSHEET EXPORT ###############################################################

if(exists("sqaPeptide")){

	selFDataCol <- c("peptide","proteinName", "ac","geneName", "proteinDescription", "idScore","idQValue"
			,"retentionTime",	"ptm", "nbPtmsPerPeptide",	"nbRolledFeatures" )
	selFDataCol <-	selFDataCol[selFDataCol %in% names(fData(sqaPeptide$eset))]

	### add modif coord
	if("motifX" %in% names(fData(sqaPeptide$eset))){
		selFDataCol <- c(selFDataCol,"motifX","modifCoord")
	}

	### add allAccessions
	if("allAccessions" %in% names(fData(sqaPeptide$eset))){
		selFDataCol <- c(selFDataCol,"allAccessions")
	}

	### add ptmPeptide
	if("ptmPeptide" %in% names(fData(sqaPeptide$eset))){
		selFDataCol <- c(selFDataCol,"ptmPeptide")
	}

	### add ptmLocProb
	if("ptmLocProb" %in% names(fData(sqaPeptide$eset))){
		selFDataCol <- c(selFDataCol,"ptmLocProb")
	}

	### add ptmLocMascotConfidence
	if("ptmLocMascotConfidence" %in% names(fData(sqaPeptide$eset))){
		selFDataCol <- c(selFDataCol,"ptmLocMascotConfidence")
	}

	# add sqa object data
	cv <- sqaPeptide$cv
	names(cv) <- paste("cv",names(cv),sep="_")
	ratio <- sqaPeptide$ratio
	if(ncol(ratio) > 0 ) names(ratio) <- paste("log2ratio",names(ratio),sep="_")
	pValue <- sqaPeptide$pValue
	if(ncol(pValue) > 0 ) names(pValue) <- paste("pValue",names(pValue),sep="_")
	qValue <- sqaPeptide$qValue
	if(ncol(qValue) > 0 )  names(qValue) <- paste("qValue",names(qValue),sep="_")

	medianSignalDf <- getSignalPerCondition(sqaPeptide$eset)
	names(medianSignalDf) <- paste("medianInt",names(medianSignalDf),sep="_")

	out <- cbind(
			fData(sqaPeptide$eset)[,selFDataCol]
			, exprs(sqaPeptide$eset)
			, medianSignalDf
			, cv
			, ratio
			, fracNAFeatures = getNAFraction(sqaPeptide$eset,method=c("cond","count"))
			, pValue
			, qValue
			, FTestPValue = sqaPeptide$FPValue
			, FTestQValue = sqaPeptide$FQValue
			)[!fData(sqaPeptide$eset)$isFiltered,]

	### add unadjusted ratios if TMT ratio correction
	if(userOptions$TAdjustRatios){
		unadjPeptideRatios <- sqaPeptide$unAdjustedRatio[!fData(sqaPeptide$eset)$isFiltered,]
		names(unadjPeptideRatios) <- paste("log2_unadjRatio",names(sqaPeptide$ratio),sep="_")
		out <- cbind(out,unadjPeptideRatios)
	}

	### paired expDesign ratio export
	if("subject" %in% names(pData(sqaPeptide$eset))){
		allRatios <- getRatios(sqaPeptide$eset,method="paired")[!fData(sqaPeptide$eset)$isFiltered,]
		names(allRatios) <- paste("log2_pairedRatio",names(allRatios),sep="_")
		out <- cbind(out,allRatios)
	}

	write.table(out
			, file=userOptions$peptideReportFilePath
			, sep=userOptions$sSheetExportDelimiter
			, row.names=F
	)

	cat("INFO: CREATED FILE ", userOptions$peptideReportFilePath,"\n")
}

if(exists("sqaProtein")){

	selFDataCol <- c("proteinName","ac","geneName","proteinDescription","idScore","idQValue","nbPeptides")
	selFDataCol <- selFDataCol[selFDataCol %in%  names(fData(sqaProtein$eset))]

	### add allAccessions
	if("allAccessions" %in% names(fData(sqaProtein$eset))){
		selFDataCol <- c(selFDataCol,"allAccessions")
	}

	# add sqa object data
	cv <- sqaProtein$cv
	names(cv) <- paste("cv",names(cv),sep="_")
	ratio <- sqaProtein$ratio
	if(ncol(ratio) > 0 ) names(ratio) <- paste("log2ratio",names(ratio),sep="_")
	pValue <- sqaProtein$pValue
	if(ncol(pValue) > 0 ) names(pValue) <- paste("pValue",names(pValue),sep="_")
	qValue <- sqaProtein$qValue
	if(ncol(qValue) > 0 ) names(qValue) <- paste("qValue",names(qValue),sep="_")

	medianSignalDf <- getSignalPerCondition(sqaProtein$eset)
	names(medianSignalDf) <- paste("medianInt",names(medianSignalDf),sep="_")

	out <- cbind(
			fData(sqaProtein$eset)[,selFDataCol]
			, exprs(sqaProtein$eset)
			, medianSignalDf
			, cv
			, ratio
			, fracNAFeatures = getNAFraction(sqaProtein$eset,method=c("cond","count"))
			, pValue
			, qValue
			, FTestPValue = sqaProtein$FPValue
			, FTestQValue = sqaProtein$FQValue
			)[!fData(sqaProtein$eset)$isFiltered,]

	# add median top3
	if(exists("esetTop3")){

		# medians
#		tmpOut <- getSignalPerCondition(esetTop3)
#		tmpOut <- tmpOut[match(rownames(out),rownames(tmpOut)), ]
#		names(tmpOut) <- paste("medianInt_top3",names(tmpOut),sep="_")

		tmpOut <- exprs(esetTop3)
		tmpOut <- tmpOut[match(rownames(out),rownames(tmpOut)), ]
		colnames(tmpOut) <- paste("top3",colnames(tmpOut),sep="_")

		out <- cbind(out,tmpOut)
	}

	# add iBAQ
	if(exists("esetIBAQ")){

		# medians
#		tmpOut <- getSignalPerCondition(esetIBAQ)
#		tmpOut <- tmpOut[match(rownames(out),rownames(tmpOut)), ]
#		names(tmpOut) <- paste("medianInt_top3",names(tmpOut),sep="_")

		tmpOut <- exprs(esetIBAQ)
		tmpOut <- tmpOut[match(rownames(out),rownames(tmpOut)), ]
		colnames(tmpOut) <- paste("iBAQ",colnames(tmpOut),sep="_")

		out <- cbind(out,tmpOut)

	}

	### add unadjusted ratios if TMT ratio correction
	if(userOptions$TAdjustRatios){
		unadjProteinRatios <- sqaProtein$unAdjustedRatio[!fData(sqaProtein$eset)$isFiltered,]
		names(unadjProteinRatios) <- paste("log2_unadjRatio",names(sqaProtein$ratio),sep="_")
		out <- cbind(out,unadjProteinRatios)
	}


	### paired expDesign ratio export
	if("subject" %in% names(pData(sqaProtein$eset))){
		allRatios <- getRatios(sqaProtein$eset,method="paired")[!fData(sqaProtein$eset)$isFiltered,]
		names(allRatios) <- paste("log2_pairedRatio",names(allRatios),sep="_")
		out <- cbind(out,allRatios)
	}

	write.table(out
			, file=userOptions$proteinReportFilePath
			, sep=userOptions$sSheetExportDelimiter
			, row.names=F
	)

	cat("INFO: CREATED FILE ", userOptions$proteinReportFilePath,"\n")
}

### SPREADSHEET EXPORT END

############################################################### PARAMS EXPORT ###############################################################
write.table(data.frame(
				param=row.names(data.frame(unlist(userOptions[names(userOptions)])))
				,value=as.vector((unlist(userOptions[names(userOptions)])))
		)
		,file=userOptions$paramsFilePath
		,sep="\t"
		,row.names=F
		,quote=F
)

cat("INFO: CREATED FILE ", userOptions$paramsFilePath,"\n")

### EXPORT PARAMS

############################################################### RDATA EXPORT ###############################################################

if(userOptions$isSaveRObject){
	save.image(file=userOptions$rDataFilePath)
	cat("INFO: CREATED FILE ", userOptions$rDataFilePath,"\n")
}
### EXPORT RDATA END
eahrne/SafeQuant documentation built on April 8, 2021, 10:10 a.m.