R/stats_univariates.R

Defines functions LSD.test GetAovSigNum ContainInfiniteVolcano GetVolcanoSigColNames GetVolcanoSigRowNames GetVolcanoSigMat GetSigTable.Volcano GetTopInx GetVolcanoCmpdInxs GetVolcanoCmpds GetVolcanoRangeX GetVolcanoHlMat GetVolcanoVrMat GetVolcanoVlMat GetVolcanoUpMat GetVolcanoDnMat ContainInfiniteTT GetUnivReport GetTtestRes GetTtestSigFileName GetTtCmpds GetTtCmpdInxs GetTtDnMat GetTtUpMat GetTTSigColNames GetTTSigRowNames GetTTSigMat GetSigTable.TT GetAnovaSigFileName GetAnovaCmpdInxs GetAnovaCmpds GetAnovaDnMat GetAnovaUpMat GetSigTable.Anova GetAovPostHocSig GetAovSigColNames GetAovSigRowNames GetAovSigMat GetFCSigColNames GetFCSigRowNames GetFCSigMat GetSigTable.FC PlotCmpdView PlotANOVA ANOVA.Anal parseFisher parseTukey FisherLSD kwtest aof PlotVolcano Volcano.Anal PlotTT Ttests.Anal GetFC PlotFC FC.Anal.paired FC.Anal.unpaired

Documented in ANOVA.Anal aof FC.Anal.paired FC.Anal.unpaired FisherLSD GetFC GetSigTable.Anova GetSigTable.FC GetSigTable.TT GetSigTable.Volcano GetTopInx GetTtestRes GetTTSigMat GetUnivReport kwtest LSD.test parseFisher parseTukey PlotANOVA PlotCmpdView PlotFC PlotTT PlotVolcano Ttests.Anal Volcano.Anal

#'Fold change analysis, unpaired
#'@description Perform fold change analysis, method can be mean or median
#'@usage FC.Anal.unpaired(mSetObj, fc.thresh=2, cmp.type = 0)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param fc.thresh Fold-change threshold, numeric input
#'@param cmp.type Comparison type, 0 for group 1 minus group 2, and 1 for group 
#'1 minus group 2
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
FC.Anal.unpaired <- function(mSetObj=NA, fc.thresh=2, cmp.type = 0){
  
  mSetObj <- .get.mSet(mSetObj);
  
  # make sure threshold is above 1
  fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh);
  max.thresh = fc.thresh;
  min.thresh = 1/fc.thresh;
  
  res <- GetFC(mSetObj, F, cmp.type);
  fc.all <- res$fc.all;
  fc.log <- res$fc.log;
  
  imp.inx <- fc.all > max.thresh | fc.all < min.thresh;
  sig.mat <- cbind(fc.all[imp.inx, drop=F], fc.log[imp.inx, drop=F]);
  colnames(sig.mat) <- c("Fold Change", "log2(FC)");
  
  # order by absolute log value (since symmetrical in pos and neg)
  inx.ord <- order(abs(sig.mat[,2]), decreasing=T);
  sig.mat <- sig.mat[inx.ord,,drop=F];
  
  fileName <- "fold_change.csv";
  write.csv(sig.mat,file=fileName);
  
  # create a list object to store fc
  mSetObj$analSet$fc<-list (
    paired = FALSE,
    raw.thresh = fc.thresh,
    max.thresh = max.thresh,
    min.thresh = min.thresh,
    fc.all = fc.all, # note a vector
    fc.log = fc.log,
    inx.imp = imp.inx,
    sig.mat = sig.mat
  );
  return(.set.mSet(mSetObj));
}

#'Fold change analysis, paired
#'@description Perform paired fold change analysis
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param fc.thresh Fold-change threshold, numeric input
#'@param percent.thresh Numeric input, from 0 to 1 to indicate the significant count threshold
#'@param cmp.type Comparison type, 0 for group 1 minus group 2, and 1 for group 
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
FC.Anal.paired <- function(mSetObj=NA, fc.thresh=2, percent.thresh=0.75, cmp.type=0){
  
  mSetObj <- .get.mSet(mSetObj);
  
  # make sure threshold is above 1
  fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh);
  max.thresh = fc.thresh;
  min.thresh = 1/fc.thresh;
  
  fc.mat <- GetFC(mSetObj, T, cmp.type);
  
  count.thresh <- round(nrow(mSetObj$dataSet$norm)/2*percent.thresh);
  mat.up <- fc.mat >= log(max.thresh,2);
  mat.down <- fc.mat <= log(min.thresh,2);
  
  count.up <- apply(mat.up, 2, sum);
  count.down <- apply(mat.down, 2, sum);
  fc.all <- rbind(count.up, count.down);
  
  inx.up <- count.up>=count.thresh;
  inx.down <- count.down>=count.thresh;
  
  colnames(fc.all) <- colnames(mSetObj$dataSet$norm);
  rownames(fc.all) <- c("Count (up)", "Count (down)");
  sig.var <- t(fc.all[,(inx.up|inx.down), drop=F]);
  
  # sort sig.var using absolute difference between count(up)-count(down)
  sig.dff <- abs(sig.var[,1]-sig.var[,2])
  inx <- order(sig.dff, decreasing=T);
  sig.var <- sig.var[inx,,drop=F];
  
  fileName <- "fold_change.csv";
  write.csv(signif(sig.var,5),file=fileName);
  
  # create a list object to store fc
  mSetObj$analSet$fc <-list (
    paired = TRUE,
    fc.mat = fc.mat,
    raw.thresh = fc.thresh,
    max.thresh = count.thresh,
    min.thresh = -count.thresh,
    fc.all = fc.all, # note: a 2-row matrix!
    inx.up = inx.up,
    inx.down = inx.down,
    sig.mat = sig.var
  );
  return(.set.mSet(mSetObj));
}

#'Plot fold change 
#'@description Plot fold change analysis
#'@usage PlotFC(mSetObj=NA, imgName, format="png", dpi=72, width=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param imgName Input a name for the plot
#'@param format Select the image format, "png", or "pdf". 
#'@param dpi Input the dpi. If the image format is "pdf", users need not define the dpi. For "png" images, 
#'the default dpi is 72. It is suggested that for high-resolution images, select a dpi of 300.  
#'@param width Input the width, there are 2 default widths, the first, width = NULL, is 10.5.
#'The second default is width = 0, where the width is 7.2. Otherwise users can input their own width.  
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
PlotFC <- function(mSetObj=NA, imgName, format="png", dpi=72, width=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
  if(is.na(width)){
    w <- 8;
  }else if(width == 0){
    w <- 7;
  }else{
    w <- width;
  }
  h <- w*6/8;
  
  mSetObj$imgSet$fc <- imgName;
  
  Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
  
  par(mar=c(5,5,2,3));
  
  fc = mSetObj$analSet$fc;
  if(fc$paired){
    ylim <- c(-nrow(mSetObj$dataSet$norm)/2, nrow(mSetObj$dataSet$norm)/2);
    xlim <- c(0, ncol(mSetObj$dataSet$norm));
    plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(mSetObj$dataSet$type),
         ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh));
    for(i in 1:ncol(fc$fc.all)){
      segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"),
               lwd= ifelse(fc$inx.up[i], 2, 1));
      segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"),
               lwd= ifelse(fc$inx.down[i], 2, 1));
    }
    abline(h=fc$max.thresh, lty=3);
    abline(h=fc$min.thresh, lty=3);
    abline(h=0, lwd=1);
  }else{
    if(fc$raw.thresh > 0){
      # be symmetrical
      topVal <- max(abs(fc$fc.log));
      ylim <- c(-topVal, topVal);
      plot(fc$fc.log,  ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(mSetObj$dataSet$type), pch=19, axes=F,
           col= ifelse(fc$inx.imp, "magenta", "darkgrey"));
      axis(2);
      axis(4); # added by Beomsoo
      abline(h=log(fc$max.thresh,2), lty=3);
      abline(h=log(fc$min.thresh,2), lty=3);
      abline(h=0, lwd=1);
    }else{ # plot side by side
      
      dat1 <- mSetObj$dataSet$norm[as.numeric(mSetObj$dataSet$cls) == 1, ];
      dat2 <- mSetObj$dataSet$norm[as.numeric(mSetObj$dataSet$cls) == 2, ];
      
      mns1 <- apply(dat1, 2, mean);
      mn1 <- mean(mns1);
      sd1 <- sd(mns1);
      msd1.top <- mn1 + 2*sd1;
      msd1.low <- mn1 - 2*sd1;
      
      mns2 <- apply(dat2, 2, mean);
      mn2 <- mean(mns2);
      sd2 <- sd(mns2);
      msd2.top <- mn2 + 2*sd2;
      msd2.low <- mn2 - 2*sd2;
      
      ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low));
      new.mns <- c(mns1, rep(NA, 5), mns2);
      cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2)));
      pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2)));
      plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab="");
      axis(2);
      axis(4); # added by Beomsoo
      abline(h=mn1, col="magenta", lty=3, lwd=2);
      abline(h=msd1.low, col="magenta", lty=3, lwd=1);
      abline(h=msd1.top, col="magenta", lty=3, lwd=1);
      abline(h=mn2, col="blue", lty=3, lwd=2);
      abline(h=msd2.low, col="blue", lty=3, lwd=1);
      abline(h=msd2.top, col="blue", lty=3, lwd=1);
      # abline(h=mean(all.mns), col="darkgrey", lty=3);
      axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2)));
    }
  }
  dev.off();
  
  return(.set.mSet(mSetObj));
}

#'Used by higher functions to calculate fold change 
#'@description Utility method to calculate FC, used in higher function
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param paired Logical, true of false
#'@param cmpType Numeric, 0 or 1
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
GetFC <- function(mSetObj=NA, paired=FALSE, cmpType){
  
  mSetObj <- .get.mSet(mSetObj);
  
  if(paired){
    if(mSetObj$dataSet$combined.method){
      data <- mSetObj$dataSet$norm;
    }else{
      data <- log(mSetObj$dataSet$row.norm,2);
    }
    
    G1 <- data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1]), ]
    G2 <- data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2]), ]
    
    if(cmpType == 0){
      fc.mat <- G1-G2;
    }else{
      fc.mat <- G2-G1;
    }
    return (fc.mat);
  }else{
    if(mSetObj$dataSet$combined.method){
      data <- mSetObj$dataSet$norm;
      m1 <- colMeans(data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1]), ]);
      m2 <- colMeans(data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2]), ]);
      
      # create a named matrix of sig vars for display
      if(cmpType == 0){
        fc.log <- signif (m1-m2, 5);
      }else{
        fc.log <- signif (m2-m1, 5);
      }
      fc.all <- signif(2^fc.log, 5);
    }else{
      data <- mSetObj$dataSet$row.norm;
      m1 <- colMeans(data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1]), ]);
      m2 <- colMeans(data[which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2]), ]);
      
      # create a named matrix of sig vars for display
      if(cmpType == 0){
        ratio <- m1/m2;
      }else{
        ratio <- m2/m1;
      }
      fc.all <- signif(ratio, 5);
      fc.log <- signif(log2(ratio), 5);
    }
    
    if(mSetObj$dataSet$combined.method){
      names(fc.all) <- names(fc.log) <- colnames(mSetObj$dataSet$norm);  
    }else{
      names(fc.all) <- names(fc.log) <- colnames(mSetObj$dataSet$row.norm) # make even vectors
    }
    return(list(fc.all = fc.all, fc.log = fc.log));
  }
}

#'Perform t-test analysis
#'@description This function is used to perform t-test analysis.
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param nonpar Logical, use a non-parametric test, T or F. False is default. 
#'@param threshp Numeric, enter the adjusted p-value (FDR) cutoff
#'@param paired Logical, is data paired (T) or not (F).
#'@param equal.var Logical, evaluates if the group variance is equal (T) or not (F). 
#'@param all_results Logical, if TRUE, returns T-Test analysis results
#'for all compounds. 
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
Ttests.Anal <- function(mSetObj=NA, nonpar=F, threshp=0.05, paired=FALSE, equal.var=TRUE, all_results=FALSE){
  
   mSetObj <- .get.mSet(mSetObj);

   if(.on.public.web & !nonpar & RequireFastUnivTests(mSetObj)){
        res <- PerformFastUnivTests(mSetObj$dataSet$norm, mSetObj$dataSet$cls, var.equal=equal.var);
   }else{
        res <- GetTtestRes(mSetObj, paired, equal.var, nonpar);
   }
   t.stat <- res[,1];
   p.value <- res[,2];
   names(t.stat) <- names(p.value) <- colnames(mSetObj$dataSet$norm);
  
   p.log <- -log10(p.value);
   fdr.p <- p.adjust(p.value, "fdr");
   
   if(all_results==TRUE){
     
     all.mat <- data.frame(signif(t.stat,5), signif(p.value,5), signif(p.log,5), signif(fdr.p,5));
     
     if(nonpar){
       tt.nm = "Wilcoxon Rank Test";  
       file.nm <- "wilcox_rank_all.csv"
       colnames(all.mat) <- c("V", "p.value", "-log10(p)", "FDR");
     }else{
       tt.nm = "T-Tests";
       file.nm <- "t_test_all.csv";
       colnames(all.mat) <- c("t.stat", "p.value", "-log10(p)", "FDR");
     }
     write.csv(all.mat, file=file.nm);
   }
  
   inx.imp <- fdr.p <= threshp;
  # if there is no sig cmpds, it will be errors, need to improve
  
  AddMsg(paste("A total of", sum(inx.imp), "significant features were found."));
  sig.num <- sum(inx.imp);
  
  if(sig.num > 0){
    sig.t <- t.stat[inx.imp];
    sig.p <- p.value[inx.imp];
    lod<- -log10(sig.p);
    sig.q <-fdr.p[inx.imp];
    
    sig.mat <- cbind(sig.t, sig.p, lod, sig.q);
    colnames(sig.mat) <- c("t.stat", "p.value", "-log10(p)", "FDR");
    ord.inx <- order(sig.p);
    sig.mat <- sig.mat[ord.inx,,drop=F];
    sig.mat <- signif(sig.mat, 5);
    
    if(nonpar){
      tt.nm = "Wilcoxon Rank Test";  
      file.nm <- "wilcox_rank.csv"
      colnames(sig.mat) <- c("V", "p.value", "-log10(p)", "FDR");
    }else{
      tt.nm = "T-Tests";
      file.nm <- "t_test.csv";
      colnames(sig.mat) <- c("t.stat", "p.value", "-log10(p)", "FDR");
    }
    write.csv(sig.mat, file=file.nm);
    
    tt <- list (
      tt.nm = tt.nm,
      sig.nm = file.nm,
      sig.num = sig.num,
      paired = paired,
      raw.thresh = threshp,
      t.score = sort(t.stat),
      p.value = sort(p.value),
      p.log = p.log,
      thresh = -log10(threshp), # only used for plot threshold line
      inx.imp = inx.imp,
      sig.mat = sig.mat
    );
  }else{
    tt <- list (
      sig.num = sig.num,
      paired = paired,
      raw.thresh = threshp,
      t.score = sort(t.stat),
      p.value = sort(p.value),
      p.log = p.log,
      thresh = -log10(threshp), # only used for plot threshold line
      inx.imp = inx.imp
    );
  }
  
  mSetObj$analSet$tt <- tt;
  
  if(.on.public.web){
    .set.mSet(mSetObj);
    return(sig.num);
  }
  return(.set.mSet(mSetObj));
}

#'Plot t-test 
#'@description Plot t-test
#'@usage PlotTT(mSetObj=NA, imgName, format="png", dpi=72, width=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param imgName Input a name for the plot
#'@param format Select the image format, "png", or "pdf". 
#'@param dpi Input the dpi. If the image format is "pdf", users need not define the dpi. For "png" images, 
#'the default dpi is 72. It is suggested that for high-resolution images, select a dpi of 300.  
#'@param width Input the width, there are 2 default widths, the first, width = NULL, is 10.5.
#'The second default is width = 0, where the width is 7.2. Otherwise users can input their own width.   
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
PlotTT <- function(mSetObj=NA, imgName, format="png", dpi=72, width=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
  if(is.na(width)){
    w <- 8;
  }else if(width == 0){
    w <- 7;
  }else{
    w <- width;
  }
  h <- w*6/8;
  
  mSetObj$imgSet$tt <- imgName;
  
  Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
  plot(mSetObj$analSet$tt$p.log, ylab="-log10(p)", xlab=GetVariableLabel(mSetObj$dataSet$type), main=mSetObj$analSet$tt$tt.nm, pch=19,
       col= ifelse(mSetObj$analSet$tt$inx.imp, "magenta", "darkgrey"));
  abline(h=mSetObj$analSet$tt$thresh, lty=3);
  axis(4); 
  dev.off();
  return(.set.mSet(mSetObj));
}

#'Perform Volcano Analysis
#'@description Perform volcano analysis
#'@usage Volcano.Anal(mSetObj=NA, paired=FALSE, fcthresh, cmpType, percent.thresh, nonpar=F, threshp, equal.var=TRUE, pval.type="raw")
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param paired Logical, T if data is paired, F if data is not.
#'@param fcthresh Numeric, input the fold change threshold
#'@param cmpType Comparison type, 1 indicates group 1 vs group 2, and 2 indicates group 2 vs group 1
#'@param percent.thresh Only for paired data, numeric, indicate the significant count threshold 
#'@param nonpar Logical, indicate if a non-parametric test should be used (T or F)
#'@param threshp Numeric, indicate the p-value threshold
#'@param equal.var Logical, indicates if the group variance is equal (T) or unequal (F)
#'@param pval.type To indicate raw p-values, use "raw". To indicate FDR-adjusted p-values, use "fdr".  
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
Volcano.Anal <- function(mSetObj=NA, paired=FALSE, fcthresh, cmpType, percent.thresh, nonpar=F, threshp, equal.var=TRUE, pval.type="raw"){
  
  mSetObj <- .get.mSet(mSetObj);

  #### t-tests
   # check to see if already done by microservice
   if(.on.public.web & !nonpar & RequireFastUnivTests(mSetObj)){
       res <- PerformFastUnivTests(mSetObj$dataSet$norm, mSetObj$dataSet$cls, var.equal=equal.var);
   }else{
       res <- GetTtestRes(mSetObj, paired, equal.var, nonpar);
   }
  p.value <- res[,2];
  if(pval.type == "fdr"){
    p.value <- p.adjust(p.value, "fdr");
  }   
  inx.p <- p.value <= threshp;
  p.log <- -log10(p.value);
  
  ### fold change analysis
  # make sure threshold is above 1
  fcthresh = ifelse(fcthresh>1, fcthresh, 1/fcthresh);
  max.xthresh <- log(fcthresh,2);
  min.xthresh <- log(1/fcthresh,2);
  
  res <- GetFC(mSetObj, paired, cmpType);
  
  # create a named matrix of sig vars for display
  fc.log <- res$fc.log;
  fc.all <- res$fc.all;
  
  inx.up <- fc.log > max.xthresh;
  inx.down <- fc.log < min.xthresh;
  
  if(paired){
    count.thresh<-round(nrow(mSetObj$dataSet$norm)/2*percent.thresh);
    mat.up <- res >= max.xthresh;
    mat.down <- res <= min.xthresh;
    
    count.up <- apply(mat.up, 2, sum);
    count.down <- apply(mat.down, 2, sum);
    fc.all <- rbind(count.up, count.down);
    
    inx.up <- count.up>=count.thresh;
    inx.down <- count.down>=count.thresh;
    
    colnames(fc.all) <- colnames(mSetObj$dataSet$norm);
    rownames(fc.all) <- c("Count (up)", "Count (down)");
    
    # replace the count.thresh for plot
    max.xthresh <- count.thresh;
    min.xthresh <- -count.thresh;
  }
  
  # create named sig table for display
  inx.imp <- (inx.up | inx.down) & inx.p;
  if(paired){ 
    sig.var <- cbind(fc.all[1,][inx.imp,drop=F], fc.all[2,][inx.imp, drop=F], p.value[inx.imp, drop=F], p.log[inx.imp, drop=F]);
    if(pval.type == "fdr"){
      colnames(sig.var)<-c("Counts (up)","Counts (down)", "p.adjusted", "-log10(p)");
    }else{
      colnames(sig.var)<-c("Counts (up)","Counts (down)", "raw.pval", "-log10(p)");
    }
    # first order by count difference, then by log(p)
    dif.count<-abs(sig.var[,1]-sig.var[,2]);
    ord.inx<-order(dif.count, sig.var[,4], decreasing=T);
    sig.var<-sig.var[ord.inx,,drop=F];
    sig.var[,c(3,4)]<-signif(sig.var[,c(3,4)],5);
  }else{
    sig.var <- cbind(fc.all[inx.imp,drop=F], fc.log[inx.imp,drop=F], p.value[inx.imp,drop=F], p.log[inx.imp,drop=F]);
    if(pval.type == "fdr"){
      colnames(sig.var) <- c("FC", "log2(FC)", "p.ajusted", "-log10(p)");
    }else{
      colnames(sig.var) <- c("FC", "log2(FC)", "raw.pval", "-log10(p)");
    }
    # first order by log(p), then by log(FC)
    ord.inx <- order(sig.var[,4], abs(sig.var[,2]), decreasing=T);
    sig.var <- sig.var[ord.inx,,drop=F];
    sig.var <- signif(sig.var,5);
  }
  
  fileName <- "volcano.csv";
  write.csv(signif(sig.var,5), file=fileName);
  volcano <- list (
    raw.threshx = fcthresh,
    raw.threshy = threshp,
    paired = paired,
    max.xthresh = max.xthresh,
    min.xthresh = min.xthresh,
    thresh.y = -log10(threshp),
    fc.all = fc.all,
    fc.log = fc.log,
    fc.log.uniq = jitter(fc.log),
    inx.up = inx.up,
    inx.down = inx.down,
    p.log = p.log,
    inx.p = inx.p,
    sig.mat = sig.var
  );
  mSetObj$analSet$volcano <- volcano;
  return(.set.mSet(mSetObj));
}

#'Create volcano plot
#'@description For labelling interesting points, it is defined by the following rules:
#'need to be signficant (sig.inx) and or 2. top 5 p, or 2. top 5 left, or 3. top 5 right. 
#'@usage PlotVolcano(mSetObj=NA, imgName, plotLbl, format="png", dpi=72, width=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param imgName Input a name for the plot
#'@param plotLbl Logical, plot labels, 1 for yes and 0 for no. 
#'@param format Select the image format, "png", or "pdf". 
#'@param dpi Input the dpi. If the image format is "pdf", users need not define the dpi. For "png" images, 
#'the default dpi is 72. It is suggested that for high-resolution images, select a dpi of 300.  
#'@param width Input the width, there are 2 default widths, the first, width = NULL, is 10.5.
#'The second default is width = 0, where the width is 7.2. Otherwise users can input their own width.   
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
PlotVolcano <- function(mSetObj=NA, imgName, plotLbl, format="png", dpi=72, width=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
  
  if(is.na(width)){
    w <- 10;
  }else if(width == 0){
    w <- 8;
  }else{
    w <- width;
  }
  h <- w*6/10;
  
  mSetObj$imgSet$volcano <- imgName;
  
  Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
  par(mar=c(5,5,3,4));
  vcn <- mSetObj$analSet$volcano;
  MyGray <- rgb(t(col2rgb("black")), alpha=40, maxColorValue=255);
  MyHighlight <- rgb(t(col2rgb("magenta")), alpha=80, maxColorValue=255);
  
  if(vcn$paired){
    xlim<-c(-nrow(mSetObj$dataSet$norm)/2, nrow(mSetObj$dataSet$norm)/2)*1.2;
    
    # merge fc.all two rows into one, bigger one win
    fc.all <- apply(vcn$fc.all, 2, function(x){ if(x[1] > x[2]){return(x[1])}else{return(-x[2])}})
    
    hit.inx <- vcn$inx.p & (vcn$inx.up | vcn$inx.down);
    plot(fc.all, vcn$p.log, xlim=xlim, pch=20, cex=ifelse(hit.inx, 1.2, 0.8),
         col = ifelse(hit.inx, MyHighlight, MyGray),
         xlab="Count of Significant Pairs", ylab="-log10(p)");
    
    sig.upInx <- vcn$inx.p & vcn$inx.up;
    p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.up;
    fc.rtInx <- GetTopInx(vcn$fc.all[1,], 5, T);
    lblInx <- p.topInx & sig.upInx & fc.rtInx;
    if(plotLbl & sum(lblInx, na.rm=T) > 0){
      text.lbls<-substr(colnames(mSetObj$dataSet$norm)[lblInx],1,14) # some names may be too long
      text(vcn$fc.all[1,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
    }
    
    sig.dnInx <- vcn$inx.p & vcn$inx.down;
    p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.down;
    fc.leftInx <- GetTopInx(vcn$fc.all[2,], 5, T) & vcn$inx.down;
    lblInx <-p.topInx & sig.dnInx & fc.leftInx;
    if(plotLbl & sum(lblInx, na.rm=T) > 0){
      text.lbls<-substr(colnames(mSetObj$dataSet$norm)[lblInx],1,14) # some names may be too long
      text(-vcn$fc.all[2,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
    }
    
  }else{
    imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p;
    plot(vcn$fc.log, vcn$p.log, pch=20, cex=ifelse(imp.inx, 1.2, 0.7),
         col = ifelse(imp.inx, MyHighlight, MyGray),
         xlab="log2 (FC)", ylab="-log10(p)");
    
    sig.inx <- imp.inx;
    p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.down);
    fc.leftInx <- GetTopInx(vcn$fc.log, 5, F);
    lblInx <-  sig.inx & (p.topInx | fc.leftInx);
    if(plotLbl &  sum(lblInx, na.rm=T) > 0){
      text.lbls<-substr(colnames(mSetObj$dataSet$norm)[lblInx],1,14) # some names may be too long
      text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
    }
    
    p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.up);
    fc.rtInx <- GetTopInx(vcn$fc.log, 5, T);
    lblInx <- sig.inx & (p.topInx | fc.rtInx);
    if(plotLbl & sum(lblInx, na.rm=T) > 0){
      text.lbls<-substr(colnames(mSetObj$dataSet$norm)[lblInx],1,14) # some names may be too long
      text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
    }
  }
  
  abline (v = vcn$max.xthresh, lty=3);
  abline (v = vcn$min.xthresh, lty=3);
  abline (h = vcn$thresh.y, lty=3);
  axis(4); # added by Beomsoo
  dev.off();
  return(.set.mSet(mSetObj));
}

#'ANOVA
#'@description Perform anova and only return p values and MSres (for Fisher's LSD)
#'@param x Input the data to perform ANOVA
#'@param cls Input class labels
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
aof <- function(x, cls) {
  aov(x ~ cls);
}


#'Kruskal-Wallis
#'@description Perform  Kruskal-Wallis Test
#'@param x Input data to perform Kruskal-Wallis
#'@param cls Input class labels
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
kwtest <- function(x, cls) {
  kruskal.test(x ~ cls);
}

#'Fisher for ANOVA
#'@description Perform  Fisher LSD for ANOVA, used in higher function 
#'@param aov.obj Input the anova object
#'@param thresh Numeric, input the alpha threshold 
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
FisherLSD <- function(aov.obj, thresh){
  LSD.test(aov.obj,"cls", alpha=thresh)
}

#'Return only the signicant comparison names
#'@description Return only the signicant comparison names, used in higher function 
#'@param tukey Input tukey output
#'@param cut.off Input numeric cut-off
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
parseTukey <- function(tukey, cut.off){
  inx <- tukey$cls[,"p adj"] <= cut.off;
  paste(rownames(tukey$cls)[inx], collapse="; ");
}

#'Return only the signicant comparison names
#'@description Return only the signicant comparison names, used in higher function 
#'@param fisher Input fisher object 
#'@param cut.off Numeric, set cut-off
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
parseFisher <- function(fisher, cut.off){
  inx <- fisher[,"pvalue"] <= cut.off;
  paste(rownames(fisher)[inx], collapse="; ");
}

#'Perform ANOVA analysis
#'@description ANOVA analysis
#'@usage ANOVA.Anal(mSetObj=NA, nonpar=F, thresh=0.05, post.hoc="fisher")
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param nonpar Logical, use a non-parametric test (T) or not (F)
#'@param thresh Numeric, from 0 to 1, indicate the p-value threshold
#'@param post.hoc Input the name of the post-hoc test, "fisher" or "tukey"
#'@param all_results Logical, if TRUE, it will output the ANOVA results for all compounds 
#'with no post-hoc tests performed.
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
ANOVA.Anal<-function(mSetObj=NA, nonpar=F, thresh=0.05, post.hoc="fisher", all_results=FALSE){

  mSetObj <- .get.mSet(mSetObj);
  
  sig.num <- 0;
  if(nonpar){
    aov.nm <- "Kruskal Wallis Test";
    anova.res <- apply(as.matrix(mSetObj$dataSet$norm), 2, kwtest, cls=mSetObj$dataSet$cls);
    
    #extract all p values
    res <- unlist(lapply(anova.res, function(x) {c(x$statistic, x$p.value)}));
    res <- data.frame(matrix(res, nrow=length(anova.res), byrow=T), stringsAsFactors=FALSE);
    
    fstat <- res[,1];
    p.value <- res[,2];
    
    names(fstat) <- names(p.value) <- colnames(mSetObj$dataSet$norm);
    fdr.p <- p.adjust(p.value, "fdr");
    
    #inx.imp <- p.value <= thresh;
    inx.imp <- fdr.p <= thresh;
    sig.num <- sum(inx.imp);
    
    if(sig.num > 0){ 
      sig.f <- fstat[inx.imp];
      sig.p <- p.value[inx.imp];
      fdr.p <- fdr.p[inx.imp];
      
      sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA');
      rownames(sig.mat) <- names(sig.p);
      colnames(sig.mat) <- c("chi.squared", "p.value", "-log10(p)", "FDR", "Post-Hoc");
      
      # order the result simultaneously
      ord.inx <- order(sig.p, decreasing = FALSE);
      sig.mat <- sig.mat[ord.inx,,drop=F];
      
      fileName <- "kw_posthoc.csv";
      my.mat <- sig.mat[,1:4];
      colnames(my.mat) <- c("chi_squared", "pval_KW", "-log10(p)", "FDR");
    }
  }else{
    aov.nm <- "One-way ANOVA";
    if(.on.public.web & RequireFastUnivTests(mSetObj)){
        res <- PerformFastUnivTests(mSetObj$dataSet$norm, mSetObj$dataSet$cls);
    }else{
        aov.res <- apply(as.matrix(mSetObj$dataSet$norm), 2, aof, cls=mSetObj$dataSet$cls);
        anova.res <- lapply(aov.res, anova);
    
        #extract all p values
        res <- unlist(lapply(anova.res, function(x) { c(x["F value"][1,], x["Pr(>F)"][1,])}));
        res <- data.frame(matrix(res, nrow=length(aov.res), byrow=T), stringsAsFactors=FALSE);
    }
    fstat <- res[,1];
    p.value <- res[,2];
    names(fstat) <- names(p.value) <- colnames(mSetObj$dataSet$norm);
    
    fdr.p <- p.adjust(p.value, "fdr");
    
    if(all_results==TRUE){
      all.mat <- data.frame(signif(p.value,5), signif(-log10(p.value),5), signif(fdr.p,5));
      rownames(all.mat) <- names(p.value);
      colnames(all.mat) <- c("p.value", "-log10(p)", "FDR");
      write.csv(all.mat, "anova_all_results.csv")
    }
    
    # do post-hoc only for signficant entries
    # inx.imp <- p.value <= thresh;
    inx.imp <- fdr.p <= thresh;
    sig.num <- sum(inx.imp);
    if(sig.num > 0){
      # note aov obj is not avaible using fast version
      # need to recompute using slower version for the sig ones
      if(.on.public.web & RequireFastUnivTests(mSetObj)){
        aov.imp <- apply(as.matrix(mSetObj$dataSet$norm[,inx.imp,drop=FALSE]), 2, aof, cls=mSetObj$dataSet$cls);
      }else{
        aov.imp <- aov.res[inx.imp];
      }
      sig.f <- fstat[inx.imp];
      sig.p <- p.value[inx.imp];
      fdr.p <- fdr.p[inx.imp];
      cmp.res <- NULL;
      post.nm <- NULL;
      if(post.hoc=="tukey"){
        tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh);
        cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh));
        post.nm = "Tukey's HSD";
      }else{
        fisher.res<-lapply(aov.imp, FisherLSD, thresh);
        cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh));
        post.nm = "Fisher's LSD";
      }
      
      # create the result dataframe,
      # note, the last column is string, not double
      
      sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res);
      rownames(sig.mat) <- names(sig.p);
      colnames(sig.mat) <- c("f.value", "p.value", "-log10(p)", "FDR", post.nm);
      
      # order the result simultaneously
      ord.inx <- order(sig.p, decreasing = FALSE);
      sig.mat <- sig.mat[ord.inx,,drop=F];
      fileName <- "anova_posthoc.csv";
    }
  }
  
  AddMsg(paste(c("A total of", sum(inx.imp), "significant features were found."), collapse=" "));
  if(sig.num> 0){
    res <- 1;
    write.csv(sig.mat,file=fileName);
    aov<-list (
      aov.nm = aov.nm,
      sig.num = sig.num,
      sig.nm = fileName,
      raw.thresh = thresh,
      thresh = -log10(thresh), # only used for plot threshold line
      p.value = p.value,
      p.log = -log10(p.value),
      inx.imp = inx.imp,
      post.hoc = post.hoc,
      sig.mat = sig.mat
    );
  }else{
    res <- 0;
    aov<-list (
      aov.nm = aov.nm,
      sig.num = sig.num,
      raw.thresh = thresh,
      thresh = -log10(thresh), # only used for plot threshold line
      p.value = p.value,
      p.log = -log10(p.value),
      inx.imp = inx.imp
    );
  }
  mSetObj$analSet$aov <- aov;
  
  if(.on.public.web){
    .set.mSet(mSetObj);
    return(res);
  }else{
    return(.set.mSet(mSetObj));
  }
}

#'Plot ANOVA 
#'@description Plot ANOVA 
#'@usage PlotANOVA(mSetObj=NA, imgName, format="png", dpi=72, width=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param imgName Input a name for the plot
#'@param format Select the image format, "png", or "pdf". 
#'@param dpi Input the dpi. If the image format is "pdf", users need not define the dpi. For "png" images, 
#'the default dpi is 72. It is suggested that for high-resolution images, select a dpi of 300.  
#'@param width Input the width, there are 2 default widths, the first, width = NULL, is 10.5.
#'The second default is width = 0, where the width is 7.2. Otherwise users can input their own width.   
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
PlotANOVA <- function(mSetObj=NA, imgName, format="png", dpi=72, width=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  lod <- mSetObj$analSet$aov$p.log;
  imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
  
  if(is.na(width)){
    w <- 9;
  }else if(width == 0){
    w <- 7;
  }else{
    w <- width;
  }
  h <- w*6/9;
  
  mSetObj$imgSet$anova <- imgName;
  
  Cairo::Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
  plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(mSetObj$dataSet$type), main=mSetObj$analSet$aov$aov.nm, type="n");
  red.inx <- which(mSetObj$analSet$aov$inx.imp);
  blue.inx <- which(!mSetObj$analSet$aov$inx.imp);
  points(red.inx, lod[red.inx], bg="red", cex=1.2, pch=21);
  points(blue.inx, lod[blue.inx], bg="green", pch=21);
  abline (h=mSetObj$analSet$aov$thresh, lty=3);
  dev.off();
  return(.set.mSet(mSetObj));
}

#'Plot Compound View 
#'@description Plots a bar-graph of selected compound over groups 
#'@usage PlotCmpdView(mSetObj=NA, cmpdNm, format="png", dpi=72, width=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param cmpdNm Input a name for the compound 
#'@param format Select the image format, "png", or "pdf". 
#'@param dpi Input the dpi. If the image format is "pdf", users need not define the dpi. For "png" images, 
#'the default dpi is 72. It is suggested that for high-resolution images, select a dpi of 300.  
#'@param width Input the width, there are 2 default widths, the first, width = NULL, is 10.5.
#'The second default is width = 0, where the width is 7.2. Otherwise users can input their own width.   
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

PlotCmpdView <- function(mSetObj=NA, cmpdNm, format="png", dpi=72, width=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  if(.on.public.web){
    load_ggplot()
  }
  
  if(mSetObj$dataSet$type.cls.lbl=="integer"){
    cls <- as.factor(as.numeric(levels(mSetObj$dataSet$cls))[mSetObj$dataSet$cls]);
  }else{
    cls <- mSetObj$dataSet$cls;
  }
  
  imgName <- gsub("\\/", "_",  cmpdNm);
  imgName <- paste(imgName, "_dpi", dpi, ".", format, sep="");
  
  my.width <- 200;
  adj.width <- 90*length(levels(cls))+20;
  if(adj.width > my.width){
     my.width <- adj.width;
  }
  
  x <- mSetObj$dataSet$norm[, cmpdNm]
  y <- cls
  df <- data.frame(conc = x, class = y)
  col <- unique(GetColorSchema(mSetObj))
  
  Cairo::Cairo(file = imgName, dpi=dpi, width=my.width, height=325, type=format, bg="transparent");
  
  p <- ggplot2::ggplot(df, aes(x=class, y=conc, fill=class)) + geom_boxplot(notch=FALSE, outlier.shape = NA, outlier.colour=NA) + theme_bw() + geom_jitter(size=1)
  p <- p + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), legend.position = "none")
  p <- p + stat_summary(fun.y=mean, colour="yellow", geom="point", shape=18, size=3, show.legend = FALSE)
  p <- p + theme(text = element_text(size=15), plot.margin = margin(t=0.20, r=0.25, b=0.55, l=0.25, "cm"))
  p <- p + scale_fill_manual(values=col) + ggtitle(cmpdNm) + theme(axis.text.x = element_text(angle=45, hjust=1))
  p <- p + theme(plot.title = element_text(size = 13, hjust=0.5, face="bold"), axis.text = element_text(size=10))
  print(p)
  dev.off()
  
}

##############################################
##############################################
########## Utilities for web-server ##########
##############################################
##############################################

#'Sig Table for Fold-Change Analysis
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@export
GetSigTable.FC <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  GetSigTable(mSetObj$analSet$fc$sig.mat, "fold change analysis", mSetObj$dataSet$type);
}

GetFCSigMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(CleanNumber(mSetObj$analSet$fc$sig.mat));
}

GetFCSigRowNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  rownames(mSetObj$analSet$fc$sig.mat);
}

GetFCSigColNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  colnames(mSetObj$analSet$fc$sig.mat);
}

GetAovSigMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(CleanNumber(as.matrix(mSetObj$analSet$aov$sig.mat[, 1:4])));
}

GetAovSigRowNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  rownames(mSetObj$analSet$aov$sig.mat);
}

GetAovSigColNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  colnames(mSetObj$analSet$aov$sig.mat[, 1:4]);
}

GetAovPostHocSig <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$analSet$aov$sig.mat[,5];
}

#'Sig Table for Anova
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@export
GetSigTable.Anova <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  GetSigTable(mSetObj$analSet$aov$sig.mat, "One-way ANOVA and post-hoc analysis", mSetObj$dataSet$type);
}

GetAnovaUpMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  lod <- mSetObj$analSet$aov$p.log;
  red.inx<- which(mSetObj$analSet$aov$inx.imp);
  if(sum(red.inx) > 0){
    return(as.matrix(cbind(red.inx, lod[red.inx])));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetAnovaDnMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  lod <- mSetObj$analSet$aov$p.log;
  blue.inx <- which(!mSetObj$analSet$aov$inx.imp);
  if(sum(blue.inx) > 0){
    return(as.matrix(cbind(blue.inx, lod[blue.inx])));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetAnovaCmpds <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  names(mSetObj$analSet$aov$p.log);
}

GetAnovaCmpdInxs<-function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(1:length(mSetObj$analSet$aov$p.log));
}

GetAnovaSigFileName <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$analSet$aov$sig.nm;
}

#'Sig Table for T-test Analysis
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@export
GetSigTable.TT <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  GetSigTable(mSetObj$analSet$tt$sig.mat, "t-tests", mSetObj$dataSet$type);
}

#'T-test matrix
#'@description Return a double matrix with 2 columns - p values and lod
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)

GetTTSigMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(CleanNumber(mSetObj$analSet$tt$sig.mat));
}

GetTTSigRowNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  rownames(mSetObj$analSet$tt$sig.mat);
}

GetTTSigColNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  colnames(mSetObj$analSet$tt$sig.mat);
}

GetTtUpMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  lod <- mSetObj$analSet$tt$p.log;
  red.inx<- which(mSetObj$analSet$tt$inx.imp);
  if(sum(red.inx) > 0){
    return(as.matrix(cbind(red.inx, lod[red.inx])));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetTtDnMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  lod <- mSetObj$analSet$tt$p.log;
  blue.inx <- which(!mSetObj$analSet$tt$inx.imp);
  
  if(sum(blue.inx) > 0){
    return(as.matrix(cbind(blue.inx, lod[blue.inx])));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetTtCmpdInxs <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(1:length(mSetObj$analSet$tt$p.log));
}

GetTtCmpds <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  names(mSetObj$analSet$tt$p.log);
}

GetTtestSigFileName <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$analSet$tt$sig.nm;
}

#'Retrieve T-test p-values
#'@description Utility method to get p values
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param paired Default set to FALSE
#'@param equal.var Default set to TRUE
#'@param nonpar Use non-parametric tests, default is set to FALSE
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
GetTtestRes <- function(mSetObj=NA, paired=FALSE, equal.var=TRUE, nonpar=F){

    mSetObj <- .get.mSet(mSetObj);
    inx1 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1]);
    inx2 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2]);
    univ.test <- function(x){t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var)};
    if(nonpar){
        univ.test <- function(x){wilcox.test(x[inx1], x[inx2], paired = paired)};
    }
    my.fun <- function(x) {
        tmp <- try(univ.test(x));
        if(class(tmp) == "try-error") {
            return(c(NA, NA));
        }else{
            return(c(tmp$statistic, tmp$p.value));
        }
    }
    res <- apply(as.matrix(mSetObj$dataSet$norm), 2, my.fun);
    return(t(res));
}

#'Utility method to perform the univariate analysis automatically
#'@description The approach is computationally expensive,and fails more often 
#'get around: make it lazy unless users request, otherwise the default t-test will also be affected
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)

GetUnivReport <- function(mSetObj=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  paired <- mSetObj$analSet$tt$paired;
  threshp <- mSetObj$analSet$tt$raw.thresh;
  
  inx1 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1]);
  inx2 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2]);
  
  # output list (mean(sd), mean(sd), p-value, FoldChange, Up/Down) 
  univStat.mat <- apply(as.matrix(mSetObj$dataSet$norm), 2, function(x) {
    
    # normality test for each group
    # ks <- ks.test(x[inx1], x[inx2]); 
    if( var(x[inx1], na.rm=T) == 0 |var(x[inx2], na.rm=T) == 0 ){ # shapiro cannot work when all values are same
      method = "";
    }else{
      sw.g1 <- shapiro.test(x[inx1]); 
      sw.g2 <- shapiro.test(x[inx2]); 
      method <- ifelse( ((sw.g1$p.value <= 0.05) | (sw.g2$p.value <= 0.05)), "(W)","")
    }
    if (method == "(W)") {
      # wilcoxon test
      tmp <- try(wilcox.test(x[inx1], x[inx2], paired = paired));
    } else {
      # t-test
      equal.var <- TRUE;
      if(var(x, na.rm=TRUE) != 0) {
        anal.var <- var.test(x[inx1], x[inx2]);
        equal.var <- ifelse(anal.var$p.value <= 0.05, FALSE, TRUE);
      }
      
      tmp <- try(t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var));
    }
    if(class(tmp) == "try-error") {
      return(NA);
    }else{            
      mean1 <- mean(x[inx1]);
      mean2 <- mean(x[inx2]);
      sd1 <- sd(x[inx1]);
      sd2 <- sd(x[inx2]);
      p.value <- paste(ifelse(tmp$p.value < 0.0001, "< 0.0001", sprintf("%.4f", tmp$p.value,4))," ", method, sep="");
      p.value.origin <- tmp$p.value;
      foldChange <- mean1 / mean2;
      foldChange <- round(ifelse( foldChange >= 1, foldChange, (-1/foldChange) ), 2);
      upDown <- ifelse(mean1 > mean2, "Up","Down");
      
      univStat <- c(
        meanSD1   = sprintf("%.3f (%.3f)", mean1, sd1),
        meanSD2   = sprintf("%.3f (%.3f)", mean2, sd2),
        p.value = p.value,
        foldChange = foldChange,
        upDown  = upDown,
        p.value.origin = sprintf("%.5f", p.value.origin)
      );
      return(univStat);
    }
  })
  
  univStat.mat <- as.data.frame(t(univStat.mat));
  
  # add FDR/q-value
  q.value <- sprintf("%.4f", p.adjust(p=as.numeric(levels(univStat.mat$p.value.origin))[univStat.mat$p.value.origin], method='fdr'));
  univStat.mat <- cbind(univStat.mat[, c(1,2,3)], q.value, univStat.mat[, c(4,5)], univStat.mat[,6]);
  names(univStat.mat)[1] <- paste("Mean (SD) of ", levels(mSetObj$dataSet$cls)[1], sep='');
  names(univStat.mat)[2] <- paste("Mean (SD) of ", levels(mSetObj$dataSet$cls)[2], sep='');
  names(univStat.mat)[3] <- "p-value";
  names(univStat.mat)[4] <- "q-value (FDR)";
  names(univStat.mat)[5] <- "Fold Change";
  names(univStat.mat)[6] <- paste(levels(mSetObj$dataSet$cls)[1],"/", levels(mSetObj$dataSet$cls)[2], sep='');
  names(univStat.mat)[7] <- "p.value.origin";
  
  univStat.mat <- cbind(Name=rownames(univStat.mat), univStat.mat);
  rownames(univStat.mat) <- NULL
  
  ## generate univariate report file (univAnalReport.csv).
  ## mixed with t-test and wilcoxon test depend on each metabolite's distribution
  univAnal.mat <- univStat.mat;
  note.str <- paste("\n Univariate Analysis Result for each variable/metabolite\n\n",
                    "[NOTE]\n", 
                    "    p-value is calculated with t-test as a default.\n",
                    "    p-value with (W) is calculated by the Wilcoxon Mann Whitney test\n\n\n", sep='');
  
  cat(note.str, file="univAnalReport.csv", append=FALSE);
  write.table(univAnal.mat, file="univAnalReport.csv", append=TRUE, sep=",", row.names=FALSE);
  
  ## generate subset with the threshold (p-value)
  sigones <- which(as.numeric(as.character(univAnal.mat$p.value.origin)) <= threshp);
  
  sigDataSet.orig <- cbind(SampleID=rownames(mSetObj$dataSet$orig), Label=mSetObj$dataSet$cls, mSetObj$dataSet$orig[,c(sigones)])
  sigDataSet.norm <- cbind(SampleID=rownames(mSetObj$dataSet$orig), Label=mSetObj$dataSet$cls, mSetObj$dataSet$norm[,c(sigones)])
  
  write.table(sigDataSet.orig, file=paste("data_subset_orig_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE);
  write.table(sigDataSet.norm, file=paste("data_subset_norm_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE);
}

ContainInfiniteTT<-function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  if(sum(!is.finite(mSetObj$analSet$tt$sig.mat))>0){
    return("true");
  }
  return("false");
}

GetVolcanoDnMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  vcn <- mSetObj$analSet$volcano;
  imp.inx <- (vcn$inx.up | vcn$inx.down) & vcn$inx.p;
  blue.inx <- which(!imp.inx);
  
  if(sum(blue.inx)>0){
    xs <- vcn$fc.log.uniq[blue.inx]
    ys <- vcn$p.log[blue.inx];
    return(as.matrix(cbind(xs, ys)));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetVolcanoUpMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  vcn <- mSetObj$analSet$volcano;
  imp.inx <- (vcn$inx.up | vcn$inx.down) & vcn$inx.p;
  red.inx <- which(imp.inx);
  if(sum(red.inx)>0){
    xs <- vcn$fc.log.uniq[red.inx]
    ys <- vcn$p.log[red.inx];
    return(as.matrix(cbind(xs, ys)));
  }else{
    return(as.matrix(cbind(-1, -1)));
  }
}

GetVolcanoVlMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  vcn <- mSetObj$analSet$volcano;
  limy <- GetExtendRange(vcn$fc.log);
  as.matrix(rbind(c(vcn$min.xthresh, limy[1]), c(vcn$min.xthresh,limy[2])));
}

GetVolcanoVrMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  vcn <- mSetObj$analSet$volcano;
  limy <- GetExtendRange(vcn$fc.log);
  as.matrix(rbind(c(vcn$max.xthresh, limy[1]), c(vcn$max.xthresh,limy[2])));
}

GetVolcanoHlMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  vcn<-mSetObj$analSet$volcano;
  limx <- GetExtendRange(vcn$fc.log);
  as.matrix(rbind(c(limx[1], vcn$thresh.y), c(limx[2],vcn$thresh.y)));
}

GetVolcanoRangeX <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  range(mSetObj$analSet$volcano$fc.log.uniq);
}

GetVolcanoCmpds <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  names(mSetObj$analSet$volcano$fc.log);
}

GetVolcanoCmpdInxs <-function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$analSet$volcano$fc.log.uniq
}

#'Volcano indices
#'@description Get indices of top n largest/smallest number
#'@param vec Vector containing volcano indices
#'@param n Numeric
#'@param dec Logical, default set to TRUE
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
GetTopInx <- function(vec, n, dec=T){
  inx <- order(vec, decreasing = dec)[1:n];
  # convert to T/F vec
  vec<-rep(F, length=length(vec));
  vec[inx] <- T;
  return (vec);
}

#'Sig table for Volcano Analysis
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@export
GetSigTable.Volcano <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  GetSigTable(mSetObj$analSet$volcano$sig.mat, "volcano plot", mSetObj$dataSet$type);
}

GetVolcanoSigMat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(CleanNumber(mSetObj$analSet$volcano$sig.mat));
}

GetVolcanoSigRowNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(rownames(mSetObj$analSet$volcano$sig.mat));
}

GetVolcanoSigColNames <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(colnames(mSetObj$analSet$volcano$sig.mat));
}

ContainInfiniteVolcano <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  if(sum(!is.finite(mSetObj$analSet$volcano$sig.mat))>0){
    return("true");
  }
  return("false");
}

GetAovSigNum <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(mSetObj$analSet$aov$sig.num);
}


#'Calculate Fisher's Least Significant Difference (LSD)
#'@description Adapted from the 'agricolae' package
#'@param y Input Y
#'@param trt Input trt
#'@param alpha Numeric, default is 0.05
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)

LSD.test <- function(y, trt, alpha = 0.05){
  clase<-c("aov","lm")
  name.y <- paste(deparse(substitute(y)))
  name.t <- paste(deparse(substitute(trt)))
  if("aov"%in%class(y) | "lm"%in%class(y)){
    A<-y$model
    DFerror<-df.residual(y)
    MSerror<-deviance(y)/DFerror
    y<-A[,1]
    ipch<-pmatch(trt,names(A))
    name.t <-names(A)[ipch]
    trt<-A[,ipch]
    name.y <- names(A)[1]
  }
  junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
  means <- tapply.stat(junto[, 1], junto[, 2], stat="mean") #change
  sds <- tapply.stat(junto[, 1], junto[, 2], stat="sd")     #change
  nn <- tapply.stat(junto[, 1], junto[, 2], stat="length")  #change
  std.err <- sds[, 2]/sqrt(nn[, 2])
  Tprob <- qt(1 - alpha/2, DFerror)
  LCL <- means[,2]-Tprob*std.err
  UCL <- means[,2]+Tprob*std.err
  means <- data.frame(means, std.err, replication = nn[, 2], LCL, UCL)
  names(means)[1:2] <- c(name.t, name.y)
  #row.names(means) <- means[, 1]
  ntr <- nrow(means)
  nk <- choose(ntr, 2)
  nr <- unique(nn[, 2])
  
  comb <- combn(ntr, 2)
  nn <- ncol(comb)
  dif <- rep(0, nn)
  LCL1<-dif
  UCL1<-dif
  sig<-NULL
  pvalue <- rep(0, nn)
  for (k in 1:nn) {
    i <- comb[1, k]
    j <- comb[2, k]
    if (means[i, 2] < means[j, 2]){
      comb[1, k]<-j
      comb[2, k]<-i
    }
    dif[k] <- abs(means[i, 2] - means[j, 2])
    sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j,4]))
    pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror));
    pvalue[k] <- round(pvalue[k],6);
    LCL1[k] <- dif[k] - Tprob*sdtdif
    UCL1[k] <- dif[k] + Tprob*sdtdif
    sig[k]<-" "
    if (pvalue[k] <= 0.001) sig[k]<-"***"
    else  if (pvalue[k] <= 0.01) sig[k]<-"**"
    else  if (pvalue[k] <= 0.05) sig[k]<-"*"
    else  if (pvalue[k] <= 0.1) sig[k]<-"."
  }
  tr.i <- means[comb[1, ],1]
  tr.j <- means[comb[2, ],1]
  output<-data.frame("Difference" = dif, pvalue = pvalue,sig,LCL=LCL1,UCL=UCL1)
  rownames(output)<-paste(tr.i,tr.j,sep=" - ");
  output;
}


tapply.stat <-function (y, x, stat = "mean"){
  cx<-deparse(substitute(x))
  cy<-deparse(substitute(y))
  x<-data.frame(c1=1,x)
  y<-data.frame(v1=1,y)
  nx<-ncol(x)
  ny<-ncol(y)
  namex <- names(x)
  namey <- names(y)
  if (nx==2) namex <- c("c1",cx)
  if (ny==2) namey <- c("v1",cy)
  namexy <- c(namex,namey)
  for(i in 1:nx) {
    x[,i]<-as.character(x[,i])
  }
  z<-NULL
  for(i in 1:nx) {
    z<-paste(z,x[,i],sep="&")
  }
  w<-NULL
  for(i in 1:ny) {
    m <-tapply(y[,i],z,stat)
    m<-as.matrix(m)
    w<-cbind(w,m)
  }
  nw<-nrow(w)
  c<-rownames(w)
  v<-rep("",nw*nx)
  dim(v)<-c(nw,nx)
  for(i in 1:nw) {
    for(j in 1:nx) {
      v[i,j]<-strsplit(c[i],"&")[[1]][j+1]
    }
  }
  rownames(w)<-NULL
  junto<-data.frame(v[,-1],w)
  junto<-junto[,-nx]
  names(junto)<-namexy[c(-1,-(nx+1))]
  return(junto)
}
xia-lab/MetaboAnalystR3.0 documentation built on May 6, 2020, 11:03 p.m.