R/UserOptions.R

Defines functions expDesignTagToExpDesign getUserOptions

Documented in expDesignTagToExpDesign getUserOptions

# TODO: Add comment
#
# Author: erikahrne
###############################################################################



### CMD OPTIONS
#' Command Line Option List
#' @export
option_list <- list(


### I/O
		make_option(c("-i", "--inputFile"), type="character", default="",
				help="I/O:  Input file: Progenesis (Feature,Protein or Peptide) .csv,
			or Scaffold Q+ (Raw Export, for TMT quant) .xls (REQUIRED)",
		),
		make_option(c("-o", "--outputDir"), type="character", default=NA,
				help="I/O:  Results Output Directory [default FOLDER OF INPUTFILE]",
		),

		make_option(c("-l", "--resultsFileLabel"), type="character", default="SQ_Results",
				help="I/O: results file directory [default %default]",
		),

		make_option(c("-f", "--fastaFile"), type="character", default="",
				help="I/O:  Protein DB .fasta file [default ./]",
		),

		make_option(c("-p", "--scaffoldPTMSpectrumReportFile"), type="character", default="",
				help="I/O:  Scaffold PTM Spectrum Report File [default ./]",
		),

		make_option(c("-d","--spreadsheetExportDelimiter"), type="integer", default=1,
		            help="I/O: Spreadsheet Export Delimiter 1) <tab> 2) <,> [default %default]",
    ),

### I/O END

# FILTER (--F)
		make_option(c("--FProteinAccessionSelection"), type="character", default=".",
				help="FILTER: --FP Filter features by Accession Regular Expression [default %default] (all features kept)",
				metavar="Protein Accession Reg. expr."),

		#### peptide analysis specfic
		make_option(c("--FModificationSelection"), type="character", default="",
				help="FILTER (LFQ PEP ONLY): --FM Only keep Peptides with modifications matching Regular Expression [default %default]
				 (all features kept).",
				metavar="modification name Reg. expr."),

		make_option(c("--FFdrCutoff"), type="double", default=0.01,
				help="FILTER (LFQ ONLY): --FF Identification level False Discovery Rate Cutoff.  [0-1] [default %default]",
				metavar="Peptide/Protein FDR cutoff"),

#		make_option(c("--FCoefficientOfVarianceMax"), type="double", default=Inf,
#				help="FILTER: --FC Do not include features with C.V. above this threshold in statistical
#				test for differential expression [default %default]",
#				metavar="Coefficent of Variance cutoff"),

		#### peptide analysis specfic
		make_option(c("--FDeltaMassTolerancePrecursor"), type="character", default="AUTO SET",
				help="FILTER (LFQ PEP ONLY): --FD Precursor mass Error Range filter (ppm) [default %default].
				Peptide imports ONLY",
				metavar="Mass Range [x,y]"),

		#### protein analysis specfic
		make_option(c("--FNumberOfPeptidesPerProteinMin"), type="integer", default=1,
				help="FILTER: --FN Only include those proteins with at least x identified peptides [default %default]
				Protein analysis ONLY.",
				metavar="Number of peptides"),

		#### peptide analysis specfic
		make_option(c("--FSitesPerPeptide"), type="integer", default=99999,
				help="FILTER: --FS Max Nb. Modifications Per Peptide [default Inf]
						Peptide analysis ONLY.",
				metavar="Max Number of PTM sites Per Petptide"),

		#### peptide analysis specfic
		make_option(c("--FLengthPeptide"), type="integer", default=1,
				help="FILTER: --FL Min Peptide Length (Nb. AA's) [default 1]
						Peptide analysis ONLY.",
				metavar="Min Peptide Length (>=)"),

		####
		make_option(c("--FExclusivePeptides"), action="store_true", default=FALSE,
				help="FILTER: --FE Discard all peptides mapping to multiple protein entries [default %default]
			Note that by default all peptides are used for quantification and assigned to proteins using
			a Occam's Razor based algorithm.
				"),

		make_option(c("--FRatioCutOff"), type="double", default=1,
				help="FILTER: --FR Intensity ratio cut-off. [default %default]",
				metavar="Intensity ratio cutoff"),


# FILTER (--F) END

# TMT (--T)

		# correct tmt ratios
		make_option(c("--TAdjustRatios"), action="store_true", default=FALSE,
				help="TMT: --TA Adjust TMT ratios using calibration mix proteins [default %default]
						"),

# TMT (--T) END

# STATISTICS (--S)

	make_option(c("--SAnchorProtein"), type="character", default=".",
			help="STATISTICS: --SA Normalize Intensities by selected protein(s) Regular Expression
			 [default %default] (use all proteins).",
			metavar="Protein Accession Reg. expr."),

  make_option(c("--SMissingValuesImutationMethod"), type="character", default="knn",
            help="STATISTICS: --SM 'ppca', 'knn','gMin','lMin','gMean,'lMean',
            [default %default] (use all proteins).",
            metavar=" ppca: probabilistic pca (+ gMin, if not enough data)
                            knn: k-nearest neighbour (+ gMin, if not enough data)
                            gMin: global minimum
                            lMin: local minimum
                            gMean: global mean
                            lMean: local mean
            "),

  make_option(c("--SNonPairWiseStatTest"), action="store_true", default=FALSE,
            help="STATISTICS: --SN non pairwise eBayes moderated t-statistic p-values.
              I.e. variance is pooled, per protein/peptide, across all runs of the study [default %default]"),

  make_option(c("--SPvalueInclude"), action="store_true", default=FALSE,
            help="STATISTICS: --SP output eBayes moderated t-statistic p-values [default %default]"),

	make_option(c("--SRawDataAnalysis"), action="store_true", default=FALSE,
			help="STATISTICS: --SR No data normalization [default %default]"),

# STATISTICS (--S) END

# EXPERIMENTAL DESIGN (--E)

	make_option(c("--EXperimentalDesign"), type="character", default=NA,
			help='EXPERIMENTAL DESIGN: --EX "," seperated samples, ":" separated conditions
					Example: 1,2,3:4,5,6
					   condition1 (REF) : channel 1,2,3
					   condition2: channel 4,5,6
					Note: for 10-plex default is "1,4,7,10:2,5,8:3,6,9"
					[default %default]'),

	make_option(c("--EProteinQuantOff"), action="store_false", default=TRUE,
			help='EXPERIMENTAL DESIGN: --EP Disable Protein Level Quantification [default %default]'),

	make_option(c("--ECorrelatedSamples "), action="store_true", default=FALSE,
			help='EXPERIMENTAL DESIGN: --EC Apply "paired" statistical tests [default %default]'),

# EXPERIMENTAL DESIGN (--E) END

# PDF-REPORT (--P)

	make_option(c("--PQvalueCutOff"), type="double", default=0.01,
			help="PDF-REPORT: --PQ Qvalue cut-off used for graphics.
			High-lighting features with a qval < specified value. [0-1] [default %default]",
			metavar="Differential expression qvalue cutOff"),

# ADDITIONAL-REPORTS (--A)
	make_option(c("--ARDataFile"), action="store_true", default=FALSE,
		help="ADDITIONAL-REPORTS: --AR Save R objects in 'label'.RData file [default %default]"),

	make_option(c("--AIbaq"), action="store_true", default=FALSE,
			help="ADDITIONAL-REPORTS : --AI add iBAQ values to results spreadsheet. [default %default]"),

	make_option(c("--ATop3"), action="store_true", default=FALSE,
			help="ADDITIONAL-REPORTS : --AT add Top3 values to results spreadsheet. [default %default]"),

	make_option(c("--AQC"), action="store_true", default=FALSE,
			help="ADDITIONAL-REPORTS : --AQ adds additional QC plots to .pdf report [default %default]"),

# ADDITIONAL-REPORTS (--A) END

# TEST (peptide analysis specific)
	make_option(c("-t", "--test"), action="store_true", default=FALSE,
			help="TEST: test option, include first 2000 entries only [default %default]
			Peptide analysis ONLY."),
# TEST END
	make_option(c("-v", "--verbose"), action="store_true", default=FALSE,
			help="Print extra output [default %default]")
	)


#' Read User Specified Command Line Options
#' @param version Safequant version number
#' @return user options list
#' @import  optparse
#' @export
#' @note  No note
#' @details No details
#' @references NA
#' @examples print("No examples")
getUserOptions <- function(version=version){

	epilogue <- "Examples:
	Progenesis LFQ Protein Quant:
	>Rscript safeQuant.R -i /path/to/peptide_measurment.csv

	Progenesis LFQ Protein Quant (QE):
	>Rscript safeQuant.R -i /path/to/peptide_measurment.csv --FL 7

	Progenesis LFQ Phospho Quant:
	>Rscript safeQuant.R -i /path/to/peptide_measurment.csv -f /path/to/proteins.fasta --FM phospho --FS 3 --EP

	Scaffold Q+ TMT Protein Quant:
	>Rscript safeQuant.R -i /path/to/Raw_Export.xls --EX 1,2,3,4:5,6,7:8,9,10

	Scaffold Q+ TMT  PEPTIDE PTM Quant (PHOSHO):
	>Rscript safeQuant.R -i /path/to/Raw_Export.xls -p /path/to/Spectrum_Export_Scaffold_PTM.xls --EX 1,2,3,4:5,6,7:8,9,10 --FM phospho --FS 3 -f /path/to/proteins.fasta
	"

	# get command line options, if help option encountered print help and exit,
	# otherwise if options not found on command line then set defaults,
	cmdOpt <- parse_args(OptionParser( prog=paste("SafeQuant",version), option_list=option_list, epilogue=epilogue))

	### CMD OPTIONS END

	### SET USER OPTIONS
	userOptions <- list()

### VERBOSE
	#VERBOSE: verbose
	userOptions$verbose <- cmdOpt$verbose
### VERBOSE	END

# I/O
	#I/O: progenesisFilePath
	userOptions$inputFile <- cmdOpt$inputFile
	if( userOptions$inputFile == "" | !file.exists(userOptions$inputFile)){
		cat("ERROR. Please specify input file.",userOptions$inputFile, "Not found!","\n")
		q(status=-1)
	}

	#I/O: resultsFileLabel
	userOptions$resultsFileLabel <- cmdOpt$resultsFileLabel

	#I/O: outputDir
	userOptions$outputDir <- cmdOpt$outputDir
	if(is.na(userOptions$outputDir)){ # see default
		userOptions$outputDir <- dirname(userOptions$inputFile)
	}
	if(!file.exists(userOptions$outputDir) & userOptions$outputDir != "" ){
		cat("ERROR. No such directory",userOptions$outputDir,"\n")
		q(status=-1)
	}else{
		userOptions$outputDir <- file.path(userOptions$outputDir, userOptions$resultsFileLabel)
	}

	#I/O: proteinFastaFile
	userOptions$proteinFastaFile <- NA
	if(nchar(cmdOpt$fastaFile) > 0 ){
		### check if file exists
		if(file.exists(cmdOpt$fastaFile)){
			userOptions$proteinFastaFile <- cmdOpt$fastaFile
		}else{
			cat("ERROR. File does not exist",cmdOpt$fastaFile,"\n")
			q(status=-1)
		}
	}

	#I/O: scaffoldPTMSpectrumReportFile
	userOptions$scaffoldPTMSpectrumReportFile <- NA
	if(nchar(cmdOpt$scaffoldPTMSpectrumReportFile) > 0 ){
		### check if file exists
		if(file.exists(cmdOpt$scaffoldPTMSpectrumReportFile)){
			userOptions$scaffoldPTMSpectrumReportFile <- cmdOpt$scaffoldPTMSpectrumReportFile
		}else{
			cat("ERROR. File does not exist",cmdOpt$scaffoldPTMSpectrumReportFile,"\n")
			q(status=-1)
		}
	}

	#I/O: spreadsheetExportDelimiter
	if(cmdOpt$spreadsheetExportDelimiter == 1){
	  userOptions$sSheetExtension = "tsv"
	  userOptions$sSheetExportDelimiter = "\t"
	}else{
	  userOptions$sSheetExtension = "csv"
	  userOptions$sSheetExportDelimiter = ","
	}

# I/O END

# FILTER (--F)

	#FILTER: selectedProteinName
	userOptions$selectedProteinName <- cmdOpt$FProteinAccessionSelection

	#FILTER: selectedModifName
	userOptions$selectedModifName <- cmdOpt$FModificationSelection

	#FILTER: fdrCutoff
	userOptions$fdrCutoff <- cmdOpt$FFdrCutoff
	if(is.na(userOptions$fdrCutoff) | userOptions$fdrCutoff <= 0 | userOptions$fdrCutoff > 1 ){
		cat("ERROR. fdrCutOff must be in the range [0-1]. You specified",userOptions$fdrCutoff,"\n")
		q(status=-1)
	}

	#FILTER: precursorMassFilter
	if(cmdOpt$FDeltaMassTolerancePrecursor == "AUTO SET" ){
		userOptions$precursorMassFilter <- NA
	}else{
		### set by user -> add lower and upper mass bound to vector
		userOptions$precursorMassFilter <- gsub("(\\[)","",cmdOpt$FDeltaMassTolerancePrecursor)
		userOptions$precursorMassFilter <- gsub("(\\])","",userOptions$precursorMassFilter)
		userOptions$precursorMassFilter <- sort(as.numeric(unlist(strsplit(userOptions$precursorMassFilter,","))))

		### check input format precursorMassFilter
		if((length(userOptions$precursorMassFilter) != 2) | sum(is.na(userOptions$precursorMassFilter)) > 0 ){
			cat("ERROR. Invalid FDeltaMassTolerancePrecursor", userOptions$minNbPeptidesPerProt, "\n")
			q(status=-1)
		}
	}

	#FILTER: cvCutOff
#	userOptions$cvCutOff <- cmdOpt$FCoefficientOfVarianceMax
#	if(is.na(userOptions$cvCutOff) | (userOptions$cvCutOff < 0)){
#		print(paste("ERROR. cvCutOff must be > 0. You specified ", userOptions$cvCutOff))
#		q(status=-1)
#	}

	#FILTER: minNbPeptidesPerProt
	userOptions$minNbPeptidesPerProt <- cmdOpt$FNumberOfPeptidesPerProteinMin
	if(is.na(userOptions$minNbPeptidesPerProt) | userOptions$minNbPeptidesPerProt < 0 ){
		print(paste("ERROR. FNumberOfPeptidesPerProteinMin must be >= 0. You specified ", userOptions$minNbPeptidesPerProt))
		q(status=-1)
	}

	#FILTER: maxNbPTMsPerPeptide
	userOptions$maxNbPtmsPerPeptide <- cmdOpt$FSitesPerPeptide
	if(is.na(userOptions$maxNbPtmsPerPeptide) | userOptions$maxNbPtmsPerPeptide < 0 ){
		print(paste("ERROR. FSitesPerPeptide must be >= 0. You specified ", userOptions$maxNbPtmsPerPeptide))
		q(status=-1)
	}

	#FILTER: maxNbPTMsPerPeptide
	userOptions$minPeptideLength <- cmdOpt$FLengthPeptide
	if(is.na(userOptions$minPeptideLength) | userOptions$minPeptideLength < 0 ){
		print(paste("ERROR. FLengthPeptide must be >= 0. You specified ", userOptions$minPeptideLength))
		q(status=-1)
	}

	#FILTER: FExclusivePeptides
	userOptions$FExclusivePeptides <- cmdOpt$FExclusivePeptides

	#FILTER: ratioCutOff
	userOptions$ratioCutOff <- cmdOpt$FRatioCutOff
	if(is.na(userOptions$ratioCutOff) | userOptions$ratioCutOff < 1){
		cat("ERROR. ratioCutoff must be > 1. You specified",userOptions$ratioCutOff,"\n")
		q(status=-1)
	}

# FILTER (--F) END

# TMT
	userOptions$TAdjustRatios <- cmdOpt$TAdjustRatios

# TMT END


# STATISTICS

	#STATISTICS: normAC
	userOptions$normAC <- cmdOpt$SAnchorProtein

	#STATISTICS: SMissingValuesImutationMethod
	userOptions$SMissingValuesImutationMethod <- cmdOpt$SMissingValuesImutationMethod

	#STATISTICS: SNonPairWiseStatTest
	userOptions$SNonPairWiseStatTest <- cmdOpt$SNonPairWiseStatTest

	#STATISTICS: eBayes
	userOptions$eBayes <- cmdOpt$SPvalueInclude

	#STATISTICS: SRawDataAnalysis
	userOptions$SRawDataAnalysis <- cmdOpt$SRawDataAnalysis

# STATISTICS END

# EXPERIMENTAL DESIGN

	#EXPERIMENTAL DESIGN: EXperimentalDesign
	userOptions$expDesignTag <- cmdOpt$EXperimentalDesign

	userOptions$proteinQuant <- cmdOpt$EProteinQuant
	#userOptions$proteinQuant <- userOptions$selectedModifName != "."

	userOptions$ECorrelatedSamples <- cmdOpt$ECorrelatedSamples

# EXPERIMENTAL DESIGN END

# PDF-REPORT (--P)

	# PDF-REPORT: deFdrCutoff
	userOptions$deFdrCutoff <- cmdOpt$PQvalueCutOff
	if(is.na(userOptions$deFdrCutoff) | userOptions$deFdrCutoff <= 0 | userOptions$deFdrCutoff > 1 ){
		cat("ERROR. deFdrCutoff must be in the range [0-1]. You specified",userOptions$deFdrCutoff,"\n")
		q(status=-1)
	}

	# PDF-REPORT: PSelectedGraphics
#	userOptions$isDispExpDesign <- !regexpr("e",cmdOpt$PSelectedGraphics) > -1
#	userOptions$isFdrPlots <- !regexpr("f",cmdOpt$PSelectedGraphics) > -1
#	userOptions$isIntensityDistributionPlots <- !regexpr("i",cmdOpt$PSelectedGraphics) > -1
#	userOptions$isVolcanoPlots <- !regexpr("v",cmdOpt$PSelectedGraphics) > -1
#	userOptions$isHClustPlot <- !regexpr("h",cmdOpt$PSelectedGraphics) > -1
#	userOptions$isDeFdrPlot <- !regexpr("d",cmdOpt$PSelectedGraphics) > -1


# PDF-REPORT (--P) END

# TSV-REPORT (--T)
#
#	# TSV-REPORT: proteinFastaFile
#	userOptions$proteinFastaFile <- NA
#	if(nchar(cmdOpt$TFastaFile) > 0 ){
#		### check if file exists
#		if(file.exists(cmdOpt$TFastaFile)){
#			userOptions$proteinFastaFile <- cmdOpt$TFastaFile
#		}else{
#			cat("ERROR. File does not exist",cmdOpt$TFastaFile,"\n")
#			q(status=-1)
#		}
#	}
#
#	# TSV-REPORT: proteaseTarget	(deprecated)
#	userOptions$protease <- cmdOpt$TProtease

# TSV-REPORT (--T) END

# ADDITIONAL-REPORTS (--A)

	#ADDITIONAL-REPORTS iBaq
	userOptions$iBAQ <- cmdOpt$AIbaq

    #ADDITIONAL-REPORTS top3
	userOptions$top3 <- cmdOpt$ATop3

	#ADDITIONAL-REPORTS additional QC plots
	userOptions$addQC <- cmdOpt$AQC

	#ADDITIONAL-REPORTS rDataFile, isSaveRObject
	userOptions$isSaveRObject <- cmdOpt$ARDataFile
	#userOptions$rDataFile <- paste(userOptions$outputDir,userOptions$resultsFileLabel,".rData",sep="")


# ADDITIONAL-REPORTS (--A) END


# TEST

	### test run to define parameters (peptide analysis specific)
	userOptions$test <- cmdOpt$test

# TEST END
	return(userOptions)

}


#userInputTag <- "1,2,3:4,5,6"
# tag: 1,2:3:4,5,6
#condition isControl
#1 Condition 1      TRUE
#2 Condition 1      TRUE
#3 Condition 1     TRUE
#4 Condition 2     FALSE
#5 Condition 2     FALSE
#6 Condition 2     FALSE

#' Create experimental design data.frame from user input string
#' @param tag tag
#' @param expDesignDefault data.frame
#' @return data.frame describing experimental design
#' @export
#' @note  No note
#' @details  tag: 1,2:3:4,5,6
#'		condition isControl
#'	1 Condition 1 TRUE
#'	2 Condition 1 TRUE
#'	3 Condition 1 TRUE
#'	4 Condition 2 FALSE
#'	5 Condition 2 FALSE
#'	6 Condition 2 FALSE
#' @references NA
#' @examples print("No examples")
expDesignTagToExpDesign <- function(tag, expDesignDefault){

	sampleOrder <- as.numeric(unlist(strsplit(tag,"[\\,\\:]")))

	# make sure no duplicates, within range etc.
	if(is.na(sampleOrder[1])
			| (max(table(sampleOrder))>1)
			| (min(sampleOrder) < 1)
			| max(as.numeric(sampleOrder)) > nrow(expDesignDefault)
			){
		stop("ERROR: expDesignTagToExpDesign, INVALID EXPERIMENTAL DESIGN ",tag,"\n")

	}

	expDesign <- data.frame(row.names=sampleOrder,condition=rep(NA,length(sampleOrder)), isControl=rep(FALSE,length(sampleOrder))  )

	condNb <- 1
	for(cond in unlist(strsplit(tag,":"))){

		#cat(as.character(unlist(strsplit(cond,","))), paste("Condition",condNb) , "\n")
		expDesign[as.character(unlist(strsplit(cond,","))),]$condition <- paste("Condition",condNb,sep="")
		condNb <- condNb + 1
	}

	expDesign[ expDesign[,1] == "Condition1" ,]$isControl <- T
	expDesign[,1] <- as.factor(expDesign[,1]) ### has to be factor and not character

	### get original sample names
	rownames(expDesign) <- rownames(expDesignDefault)[as.numeric(rownames(expDesign))]
	#expDesignUser$condition <- expDesign[rownames(expDesignUser) ,]$condition

	# get original condition names, unless conditions have been split

	# if more conditions than originally use cond_1, cond_n labellinf @TODO can be done better, to avoid loosing org condition names
	if(length(unique(expDesign$condition)) > length(unique(expDesignDefault$condition))) return(expDesign)

	# check if conditions are split in new expDesign
	for(cond in unique(expDesign$condition)){
		runs <- rownames(expDesign)[expDesign$condition == cond]

		if(length(unique(expDesignDefault[runs,]$condition)) > 1){
			return(expDesign)
			#stop("SPLIT")
		}
	}

	### make sure a single condirion has not been split into two
	# I.e there should be just as many conditions before and anfter retreival of orginial condition names
	if(length(unique(	expDesign$condition)) == length(unique(	 expDesignDefault[rownames(expDesign),]$condition)) ){
		# get original condition names
		expDesign$condition <- expDesignDefault[rownames(expDesign),]$condition
	}

	return(expDesign)

}
eahrne/SafeQuant documentation built on April 8, 2021, 10:10 a.m.