R/One2OneAnalysisHelpers.R

Defines functions NormalizeWithMedianPQMatrix ImputeValuesInProtMatrixForRowsWithZeros RemoveRowsWithZerosFromProtMatrix DoCorrelationOnMatrix Do2grpTtestRobustOnMatrixAndBHcorrWithThresholdAndFoldChangeAndR Do2grpTtestOnMatrixAndBHcorrReturnAllInternalTrafo RobustFtest.returnpValue RobustTtest.returnpValue .fgcz_render_One2OneReport fgcz_render_One2OneReport

Documented in fgcz_render_One2OneReport

#' perform rendering
#' @importFrom utils Stangle Sweave head packageVersion read.csv read.table write.table
#' @export
fgcz_render_One2OneReport <- function(maxquanttxtdirectory = '.', reportFileBaseName = 'fgcz_MQ_QC_report'){

  RMD_QC1To1_Old(maxquanttxtdirectory)
  # 2018-08-17 -> convention and cleaning with CP
  # e.g. mq_msms_filename
  mq_msms_filename <- "msms.txt"
  mq_summary_filename <- "summary.txt"
  mq_evidence_filename <- "evidence.txt"
  mq_proteinGroups_filename <- "proteinGroups.txt"
  mq_parameters_filename <- "parameters.txt"
  mq_peptides_filename <- "peptides.txt"

  texFile <- paste(reportFileBaseName, "tex", sep='.')
  RnwFile <- paste(reportFileBaseName, "Rnw", sep=".")

  projectID <<- "xxxx"
  orderID <<- "xxxx"

  message("Reading in txt tables from maxquant ...")
  mq_msms <<- read.table(mq_msms_filename, header = TRUE, sep ="\t", stringsAsFactors = TRUE)
  mq_summary <<- read.table(mq_summary_filename, header = FALSE, sep ="\t", stringsAsFactors = TRUE)
  mq_evidence <<- read.table(mq_evidence_filename, header = TRUE, sep ="\t", stringsAsFactors = TRUE)
  mq_proteinGroups <<- read.csv(mq_proteinGroups_filename, sep ="\t", stringsAsFactors = FALSE, header = TRUE)
  mq_parameters <<- read.table(mq_parameters_filename, header = TRUE, sep = "\t", stringsAsFactors = TRUE)
  mq_peptides <<- read.csv(mq_peptides_filename, sep ="\t", stringsAsFactors = FALSE, header = TRUE)

  message("Preprocessing ...")
  mq_proteinGroups_intensities <<- mq_proteinGroups[,grep("^Intensity\\.", colnames(mq_proteinGroups))]
  rownames(mq_proteinGroups_intensities) <- mq_proteinGroups$Majority.protein.IDs

  bool_moreThanOnePeptide <<- mq_proteinGroups$Razor...unique.peptides > 1

  # get Sample QC running
  message("Now Sweaving and LaTeXing ...")
  Stangle(RnwFile)
  Sweave(RnwFile)
  tools::texi2dvi(texFile, pdf = TRUE)
}



.fgcz_render_One2OneReport <- function(maxquanttxtdirectory = '.', reportFileBaseName = 'MQ_sampleQC_overview'){

   RMD_QC1To1_Old(maxquanttxtdirectory)

   msmsName <- "msms.txt"
   summary <- "summary.txt"
   evidence <- "evidence.txt"
   proteinGroups <- "proteinGroups.txt"
   parameters <- "parameters.txt"
   peptides <- "peptides.txt"

   texFile <- paste(reportFileBaseName, "tex", sep='.')
   RnwFile <- paste(reportFileBaseName, "Rnw", sep=".")

   projectID <<- "xxxx"
   orderID <<- "xxxx"

   msms_d <<- read.table(msmsName, header = T, sep="\t", stringsAsFactors = TRUE)
   summ <<- read.table(summary, header = F, sep="\t", stringsAsFactors = TRUE)
   evi_d <<- read.table(evidence, header = T, sep="\t", stringsAsFactors = TRUE)

   Fulldat <<- read.csv(proteinGroups, sep="\t", stringsAsFactors = FALSE, header = TRUE)

   dat <<- Fulldat[,grep("^Intensity\\.", colnames(Fulldat))]

   rownames(dat) <- Fulldat$Majority.protein.IDs

   bool_moreThanOnePeptide <<- Fulldat$Razor...unique.peptides > 1

   params <<- read.table(parameters, header = TRUE, sep = "\t")

   # peptides
   pepts <<- read.csv(peptides, sep="\t", stringsAsFactors = FALSE, header = TRUE)

   # this is for witold to be replaced
   #fixedProteingroups <- "proteinGroups_FGCZ2grp_Intensity.txt"
   #dat <- read.table(fixedProteingroups, header=T, sep="\t",row.names=1)

   # get it committed

   # get Sample QC running
   Stangle(RnwFile)
   Sweave(RnwFile)
   tools::texi2dvi(texFile, pdf = TRUE)
}


####

# January 2014
# some changes along 2015
# around Feb 2015Hubis smart test
# August 2015 .. more options for some functions
# Nov 2015 .. paired t-test
# Dez 2015 .. Multi-group analysis
# January 2014
# some changes along 2015
# around Feb 2015Hubis smart test
# August 2015 .. more options for some functions
# Nov 2015 .. paired t-test
# Dez 2015 .. Multi-group analysis
# Jan 2016 .. Boxplot for 2 groups


# Jonas Grossmann <jg@fgcz.ethz.ch>

###################################################
### Function for ProteinQuantMatrix
###################################################

#library(affy)
#library(missForest)
#library(gplots)
#library(limma)
#library(genefilter)
#library(beeswarm)
#library(quantable)


#RobustTtest
#CHECKED
RobustTtest.returnpValue <- function(grpOne, grpTwo) {
  obj<-try(t.test(grpOne, grpTwo), silent=TRUE)
  if (is(obj, "try-error")) return(NA) else return(obj$p.value)
}

#Add more groups for comparisons
RobustFtest.returnpValue <- function(quantMatrix, groupFactorScenario) {
  obj<-try(rowFtests(quantMatrix, groupFactorScenario), silent=TRUE)
  if (is(obj, "try-error")) return(NA) else return(obj$p.value)
}


# 2grps
Do2grpTtestOnMatrixAndBHcorrReturnAllInternalTrafo = function(ProtQuantMatrix_rn, bool_TrafoHere = TRUE){
  reps <- ncol(ProtQuantMatrix_rn)/2
  grp_1 <- ProtQuantMatrix_rn[,1:reps]
  grp_2 <- ProtQuantMatrix_rn[,(reps+1):ncol(ProtQuantMatrix_rn)]

  #mean vectors
  grp1_means <- vector()
  grp2_means <- vector()
  for (i in 1:nrow(ProtQuantMatrix_rn)) {
    grp1_means[i] <- rowMeans(grp_1[i,], na.rm=TRUE)
    grp2_means[i] <- rowMeans(grp_2[i,], na.rm=TRUE)
  }
  #ttest
  pValueVector <- vector()
  if (bool_TrafoHere == TRUE) {
    for (i in 1:nrow(ProtQuantMatrix_rn)) {
      pValueVector[i] <- RobustTtest.returnpValue(asinh(grp_1[i,]),asinh(grp_2[i,]))
    }
  } else {
    for (i in 1:nrow(ProtQuantMatrix_rn)) {
      pValueVector[i] <- RobustTtest.returnpValue(grp_1[i,],grp_2[i,])
    }
  }

  #multipleTestingCorrection
  fdrValues <- vector()
  fdrValues <- p.adjust(pValueVector, method="fdr")
  #FC
  log2FCvector <- vector()
  for (i in 1:nrow(ProtQuantMatrix_rn)) {
    log2FCvector[i] <- log(grp1_means[i]/grp2_means[i],2)
  }
  proteinNamesPValFCs<- data.frame(matrix(ncol = 4, nrow(ProtQuantMatrix_rn)))
  colnames(proteinNamesPValFCs) <- c("ProtName","pValue","fdr","log2FC")
  proteinNamesPValFCs <- cbind(row.names(ProtQuantMatrix_rn), pValueVector, fdrValues, log2FCvector)
  return(proteinNamesPValFCs)
}


#ProtQuantMatrix_rn <- i_dat
# CHECKED
#ProtQuantMatrix_rn <- mat
#SignificanceThreshold=0.01
#LinFoldChangeThreshold=2
#bool_TrafoHere = TRUE
Do2grpTtestRobustOnMatrixAndBHcorrWithThresholdAndFoldChangeAndReturnOnlySignificantsInternalTrafo = function(ProtQuantMatrix_rn, SignificanceThreshold=0.01, LinFoldChangeThreshold=2, bool_TrafoHere=TRUE){
  FoldChangeThreshold <- abs(log(LinFoldChangeThreshold,2))
  reps <- ncol(ProtQuantMatrix_rn)/2
  grp_1 <- ProtQuantMatrix_rn[,1:reps, drop=FALSE]
  grp_2 <- ProtQuantMatrix_rn[,(reps+1):ncol(ProtQuantMatrix_rn), drop=FALSE]

  #mean vectors
  grp1_means <- rowMeans(grp_1, na.rm=TRUE)
  grp2_means <- rowMeans(grp_2, na.rm=TRUE)
  #ttest
  pValueVector <- vector()
  if (bool_TrafoHere == TRUE) {
    for (i in 1:nrow(ProtQuantMatrix_rn)) {
      pValueVector[i] <- RobustTtest.returnpValue(asinh(grp_1[i,]),asinh(grp_2[i,]))
    }
  } else {
    for (i in 1:nrow(ProtQuantMatrix_rn)) {
      pValueVector[i] <- RobustTtest.returnpValue(grp_1[i,],grp_2[i,])
    }
  }

  #multipleTestingCorrection
  fdrValues <- vector()
  fdrValues <- p.adjust(pValueVector, method="fdr")
  #FC
  log2FCvector <- vector()
  for (i in 1:nrow(ProtQuantMatrix_rn)) {
    log2FCvector[i] <- NA
    #here a try..
    log2FCvector[i] <- log(grp1_means[i]/grp2_means[i],2)
  }
  boolIncludeIt_vec<- vector()
  #df with 3 cols and rownames
  for (i in 1:nrow(ProtQuantMatrix_rn)) {
    if (is.finite(log2FCvector[i]) & abs(log2FCvector[i]) > FoldChangeThreshold & is.finite(pValueVector[i]) & pValueVector[i] <  SignificanceThreshold) {
      boolIncludeIt_vec[i] <- TRUE
    } else {
      boolIncludeIt_vec[i] <- FALSE
    }
  }
  proteinNamesPValFCs<- data.frame(matrix(ncol = 4, nrow = sum(boolIncludeIt_vec)))
  proteinNamesPValFCs <- cbind(row.names(ProtQuantMatrix_rn)[boolIncludeIt_vec], pValueVector[boolIncludeIt_vec], fdrValues[boolIncludeIt_vec], log2FCvector[boolIncludeIt_vec])
  return(proteinNamesPValFCs)
}

#ProtQuantMatrix_rn <- mat
# CHECKED
DoCorrelationOnMatrix = function(ProtQuantMatrix_rn, ...){
  PQmat <- na.omit(as.matrix(ProtQuantMatrix_rn))
  ProtcorrMatrix <- cor(PQmat)
  heatmap.2(as.matrix(ProtcorrMatrix),margin=c(10,10),trace="none", ...)
}

#CHECKED
# remove rows with Zeros
RemoveRowsWithZerosFromProtMatrix = function(ProtQuantMatrix_rn){
  #Only Look at Not-Zero-Proteins
  boolZeroProtein <- vector()
  for (i in 1:nrow(ProtQuantMatrix_rn)) {
    if (sum(ProtQuantMatrix_rn[i,] == 0) == 0) {
      boolZeroProtein[i]<-TRUE
    } else {
      boolZeroProtein[i]<-FALSE
    }
  }
  ProtQuantMatrix_rn_noZeros <- ProtQuantMatrix_rn[boolZeroProtein,]
  return(ProtQuantMatrix_rn_noZeros)
}

#CHECKED
# Impute Values for Zeros
ImputeValuesInProtMatrixForRowsWithZeros = function(ProtQuantMatrix_rn){
  NumSamples <- ncol(ProtQuantMatrix_rn)
  #replace zeros with NA
  ProtQuantMatrix_rn[ProtQuantMatrix_rn==0] <- NA
  #  library(missForest)
  dataimpute <- missForest::missForest(ProtQuantMatrix_rn)$ximp
  ProtQuantMatrix_rn_imputed <- dataimpute
  colnames(ProtQuantMatrix_rn_imputed) = colnames(ProtQuantMatrix_rn)
  return(ProtQuantMatrix_rn_imputed)
}


# Normalization
# CHECKED
NormalizeWithMedianPQMatrix = function(ProtQuantMatrix_rn){
  maxMedian <- max(apply(na.omit(ProtQuantMatrix_rn[,1:ncol(ProtQuantMatrix_rn)]),2,median))
  ScaleFactorsMed <- 1/(apply(na.omit(ProtQuantMatrix_rn[,1:ncol(ProtQuantMatrix_rn)]),2,median)/maxMedian)
  write.table(ScaleFactorsMed, "appliedScaleFactors_Median.txt",row.names=TRUE,col.names=FALSE)
  nPQmatrix <- ProtQuantMatrix_rn
  for (i in 1:ncol(ProtQuantMatrix_rn)) {
    nPQmatrix[,i] <- ProtQuantMatrix_rn[,i]*ScaleFactorsMed[i]
  }
  return(nPQmatrix)
}
protViz/SRMService documentation built on Nov. 13, 2021, 9:58 a.m.