R/doPlotFCT.R
In ABarray: Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

#- $Id: doPlotFCT.R,v 1.1.1.1 2006/06/06 22:06:37 sunya Exp $

#- Main function to perform fold change and t test.

"doPlotFCT" <-
function(eset, group, grpMember, order1=NULL,order2=NULL, detectSample = 0.5, snThresh = 3, ...) {
   #- snThresh S/N threshold value for filtering
   #- detectSample Percentage of samples that meet snThresh for t test analysis.
  paired <- FALSE
  filePrefix <- paste("DetectPct", detectSample * 100, sep = "")
  if(!is.null(order1) && !is.null(order2)) {
    filePrefix <- paste("paired_", filePrefix, sep = "")
    paired <- TRUE
    if(length(order1) != length(order2)) {
      cat("Number of samples for order1 and order2 is not equal,\n",
          "paired t test cannot be performed.\n")
      return()
    }
  }

  probeLimit <- 800
  binColor <- c("cyan", "green", "blue", "black", "red")
  fcBin <- list("FC 1.0-1.2" = c(1.0, 1.2), "FC 1.2-1.6" = c(1.2, 1.6),
                "FC 1.6-2.0" = c(1.6, 2.0),
                "FC 2.0-4.0" = c(2.0, 4.0), "FC > 4" = c(4.0, Inf))
  pValPreset <- c(0.01, 0.05)
  fdrPreset <- c(0.01, 0.05, 0.1, 0.25)
  
  #- Process additional arguments for sweave and hidden variables, ...
  addParam <- list(...)
  if(is.element("probeLimit", names(addParam))) {
    probeLimit <- addParam$probeLimit
  }
  if(is.element("binColor", names(addParam))) {
    if(length(addParam$binColor) == length(fcBin)) {
      binColor <- addParam$binColor
    }
  }
  if(is.element("pValPreset", names(addParam))) {
    pValPreset <- addParam$pValPreset
  }
  if(is.element("fdrPreset", names(addParam))) {
    fdrPreset <- addParam$fdrPreset
  }

  
  kWidth <- 800  #- default figure size
  kHeight <- 600
  
  fdpPreset <- c(pValPreset, fdrPreset)
  fdpName <- c(rep("pVal", length(pValPreset)), rep("FDR", length(fdrPreset)))
  fdpResult <- rep(0, length(fdpPreset))
   
  pd <- pData(eset)
  if(missing(grpMember)) {
    grpMember <- levels(factor(pd[, colnames(pd) == group]))
  }
  subgrpCount <- length(grpMember)

  if(subgrpCount <= 1) {
    print("Only one group member available, no t test will be performed")
    return()
  }

  eset <- getMemberEset(eset, group, grpMember)

  #- Check to see if there is S/N ratio values
  snPresent <- TRUE
  sn <- assayDataElement(eset, "snDetect")
  if(dim(sn)[2] <= 1 || sum(is.na(sn[,])) > 1) {
    snPresent <- FALSE
  }

	#- Create a directory to put all the figures in
  resultDir <- paste("Result_", gsub(" ", "", group), "/", sep = "")
  if(! file.exists(resultDir)) {
    dir.create(resultDir, showWarnings = FALSE)
  }
  dataDir <- paste(resultDir, "DataResult/", sep = "")
  pdfDir <- paste(resultDir, "pdfFigure/", sep = "")
  jpgDir <- paste(resultDir, "jpgFigure/", sep = "")
  if(!file.exists(dataDir)) {
    dir.create(dataDir, showWarning = FALSE)
  }
  if(!file.exists(pdfDir)) {
    dir.create(pdfDir, showWarning = FALSE)
  }
  if(!file.exists(jpgDir)) {
    dir.create(jpgDir, showWarning = FALSE)
  }
  
  #-print(paste("Count:", subgrpCount))
  for(grp1 in 1:(subgrpCount - 1)) {
    if(sum(pd[, group] == grpMember[grp1]) > 1) {
      for(grp2 in (grp1 + 1):subgrpCount) {
        if(sum(pd[, group] == grpMember[grp2]) > 1) {
          member <- c(grpMember[grp1], grpMember[grp2])
          memberName <- paste(member[1], member[2], sep = "-")
          cat("\nPerforming t test between", member[1], "and", member[2], "...\n")
          flush.console()
          
          idxProbe <- 1:dim(exprs(eset))[1]
          if(snPresent) {
            snT <- snSummary(eset, snThresh, group, member)
            idx1 <- which(snT[,1] >= detectSample)
            idx2 <- which(snT[,2] >= detectSample)
            idxProbe <- union(idx1, idx2)
          }      
          probeCount <- length(idxProbe)
          esetUse <- eset[idxProbe,]
          colMember1 <- which(pData(esetUse)[, group] == member[1])
          colMember2 <- which(pData(esetUse)[, group] == member[2])
          sd1 <- sd(t(exprs(esetUse[, colMember1])), na.rm = TRUE)
          sd2 <- sd(t(exprs(esetUse[, colMember2])), na.rm = TRUE)
          rowSd1 <- union(which(is.na(sd1)), which(sd1 == 0))
          rowSd2 <- union(which(is.na(sd2)), which(sd2 == 0))
          rowSd <- union(rowSd1, rowSd2)
          if(length(rowSd) > 0) {
            cat(length(rowSd), "probes had sdev = 0, they were removed in the t test\n")
            print(featureNames(esetUse)[rowSd])
            esetUse <- esetUse[-rowSd,]
            idxProbe <- idxProbe[-rowSd]
            probeCount <- length(idxProbe)        
          }
           #- Calculate fold change and t test using simpleaffy
           #-fct <- get.fold.change.and.t.test(esetUse, group, member, a.order=order1, b.order=order2)
          tRes <- apply(exprs(esetUse), 1, function(x) {
            if(paired) {
              t.test(x[colMember1[order1]], x[colMember2[order2]], na.rm = TRUE, paired = TRUE)$p.value
            }
            else {
              t.test(x[colMember1], x[colMember2], na.rm = TRUE)$p.value
            }
          })
          fcRes <- apply(exprs(esetUse), 1, function(x) {
            if(paired) {
              mean(x[colMember1[order1]] - x[colMember2[order2]], na.rm = TRUE)
            }
            else {
              mean(x[colMember1], na.rm = TRUE) - mean(x[colMember2], na.rm = TRUE)
            }
          })
      
       #- Volcano plot
          print("Creating volcano plot ...")
          for(i in seq(along = pValPreset)) {
            fileName <- paste("ST_", filePrefix, "VolcanoPlot_p", sub("0.", "", pValPreset[i]), memberName, sep = "")
            pdf(file = paste(pdfDir, fileName, ".pdf", sep = ""), width = kWidth/100, height = kHeight/100)
            dev.control("enable")
            idxPval <- which(tRes <= pValPreset[i])
            idxFC <- which(abs(fcRes) >= 1)
            idxUse <- intersect(idxPval, idxFC)
            plot(fcRes[-idxUse], -log10(tRes[-idxUse]), yaxt = "n", pch = 16, col = "blue",
                 xlab = paste("log2 of FC", memberName), ylab = "p values",
                 main = paste("Volcano Plot ", memberName))
      
            points(fcRes[idxUse], -log10(tRes[idxUse]), col = "red", pch = 16)
            axis(2, 1:5, labels = c(0.1, 0.01, 0.001, 0.0001, 0.00001))
            abline(h = -log10(pValPreset[i]), lty = 2)
            abline(v = c(-1, 1), col = "blue", lty = 2)
            savejpg(paste(jpgDir, fileName, sep = ""), width = kWidth, height = kHeight)
            dev.off()
          }
       
       #- Apply FDR
          result.adj <- mt.rawp2adjp(tRes, "BH")
          adjpFDR <- result.adj$adjp[order(result.adj$index),]

                                        #- write fc, tt, FDR to csv file
          fcBinInfo <- vector(mode = "character", length = probeCount)
          for(bin in seq(along = fcBin)) {
            idxBin <- intersect(which(abs(fcRes) > log2(fcBin[[bin]][1])),
                                which(abs(fcRes) <= log2(fcBin[[bin]][2])))
            fcBinInfo[idxBin] <- names(fcBin)[bin]
          }

          output <- cbind("ProbeID" = featureNames(esetUse),  "FC" = 2 ^ fcRes, "Log2(FC)" = fcRes,
                          "p value" = adjpFDR[,1], "FDR (BH)" = adjpFDR[,2], "FC Bin" = fcBinInfo)
          colnames(output) = c("ProbeID", paste("FC (", member[1], "/", member[2], ")", sep = ""),
                    "Log2(FC)", "p value", "FDR (BH)", "FC Bin")
          outName <- paste(dataDir, "ST_", filePrefix, "FCpvalFDR_", memberName, ".csv", sep = "")
          print(paste("Writing t test result to file: ", outName))
          flush.console()
          write.table(output, file = outName, sep = ",", row.names = FALSE, col.names = TRUE)
          output <- NULL
       
       #- Preset an interval to check how many probes pass FDR at specified level
          fdp <- data.frame(preset = fdpPreset, name = fdpName, result = fdpResult)
          cat("\nThe t test was performed if the probe shows S/N >= ", snThresh, " in ",
              detectSample *100, "% or more samples\n",
              "    in either ", member[1], " or ", member[2], "\n", sep = "")
          cat("The number of probes after filtering is:", probeCount, "\n\n")
          flush.console()

          for(i in 1:dim(fdp)[1]) {
            idxProbe <- c()
            fdr <- paste(fdp$name[i], fdp$preset[i]) # what and which level
            if(fdp$name[i] == "pVal") {
              pvCount <- sum(adjpFDR[,1] <= fdp$preset[i], na.rm = TRUE)
              fdp$result[i] <- pvCount
              idxProbe <- which(adjpFDR[,1] <= fdp$preset[i])
              cat("p=", fdp$preset[i], ", significant probes: ", pvCount, "\n", sep = "")
            }
            else {
              fdrCount <- sum(adjpFDR[,2] <= fdp$preset[i], na.rm = TRUE)
              fdp$result[i] <- fdrCount
              idxProbe <- which(adjpFDR[,2] <= fdp$preset[i])
              cat("FDR=", fdp$preset[i], ", significant probes: ", fdrCount, "\n", sep = "")
            }
         
       #- Make plots for no FDR (p = 0.01, and 0.05) first,
       #- then, make plots after FDR adjustment
            if(length(idxProbe) > 1) {
                 #fctUse <- fct[idxProbe,]
              tResUse <- tRes[idxProbe]
              fcResUse <- fcRes[idxProbe]
              rowMean <- rowMeans(exprs(esetUse[idxProbe,]), na.rm = TRUE)
                                        #- Divide fc into 5 bins:
                                        #- fc 1 - 1.2;  1.2 - 1.6;  1.6 - 2.0;  2.0 - 4.0; > 4.0
              idxBinProbe <- list()
              for(bin in seq(along = fcBin)) {
                theIdx <- intersect(which(abs(fcResUse) > log2(fcBin[[bin]][1])),
                  which(abs(fcResUse) <= log2(fcBin[[bin]][2])))
                idxBinProbe[[bin]] <- theIdx
              }
                                        #- Create Fold Change plot
              print(paste("Creating Fold Change plot for:", memberName, fdr, " ..."))
              flush.console()
              fileName <- paste("ST_", filePrefix, "fct_", memberName, "_", sub(" 0.", "", fdr), sep = "")
              pdf(file = paste(pdfDir, fileName, ".pdf", sep = ""), width = kWidth/100, height = kHeight/100)
              dev.control("enable")
              plot(min(rowMean), 0, type = "n", xlim = c(min(rowMean), max(rowMean)), ylim = c(-4, 4),
                   xlab = "Average of Log2 Signal", ylab = "Fold Change (Log2)",
                   main = paste("Average FC ", memberName, " (", length(idxProbe), " probes) ",
                     fdr, sep = ""))
              legendText <- rep("", length(fcBin))
              for(bin in seq(along = fcBin)) {
                points(rowMean[idxBinProbe[[bin]]], fcResUse[idxBinProbe[[bin]]],
                       col = binColor[bin], pch = 21, bg = binColor[bin])
                legendText[bin] = paste(names(fcBin)[bin], ", ", length(idxBinProbe[[bin]]), "    ", sep = "")
              }
              legend("topright", legendText, col = binColor, pt.bg = binColor, pch = 21, pt.cex = 2)
          
              savejpg(paste(jpgDir, fileName, sep = ""), width = kWidth, height = kHeight)
              dev.off()

                                        #- Create cluster
              if(length(idxProbe) < probeLimit & length(idxProbe) > 2) { 
                exps <- exprs(esetUse[idxProbe,])
                dist <- c("Euclidean")
                for(step in 1:length(dist)) {
                  print(paste("Creating cluster heatmap for ", fdr, " using ", dist[step], " ...", sep = ""))
                  flush.console()
                  filename <- paste("ST_", filePrefix, "hcluster_", memberName, "_", sub(" 0.", "", fdr),
                                    dist[step], sep = "")
                  pdf(file = paste(pdfDir, filename, ".pdf", sep = ""), width = kWidth/100, height = kWidth/100)
                  dev.control("enable")
                  hv <- hclusterPlot(exps, paste("Cluster ", fdr, group), dist = dist[step])
                  savejpg(paste(jpgDir, filename, sep = ""), width = kWidth, height = kWidth)
                  dev.off()
              
              #- Export the list of probes
                  fname = paste(dataDir, filename, "ProbeList.csv", sep = "")
                  print(paste("Exporting lists to", fname))
                  write.csv(exps[rev(hv$rowInd), hv$colInd], file = fname)
                }
              }
              else {
                print(paste("Clustering was not performed, because there are", length(idxProbe), "probes."))
                print(paste("    The limit of probe counts for clustering is:", probeLimit))
              }
            }
          }
        }
      }
    }
  }
}

##############################################################
#- $Log: doPlotFCT.R,v $
#- Revision 1.1.1.1  2006/06/06 22:06:37  sunya
#- ABarray project converted from ab1700 project
#-
#- Revision 1.8  2006/03/21 23:00:17  sunya
#- Improved memory usage.
#- Control probes in the exported control signal value file are now sorted.
#- Improved handling of NA values. The statistical analysis will ignore NA.
#- If there is only one member of a subgroup, this subgroup will not appear
#- in t test.
#-
#- Revision 1.7  2006/03/20 23:00:16  sunya
#- Missing values (NA) are kept even after imputation. But most downstream
#- analysis will remove these NA values.
#- The hierarchical clustering probe list and expression values are now
#- exported into csv file.
#- Changed file naming convention. QC plot will begin QC_, and statistical
#- analysis plot will begin ST_ in the figures.
#- Values for control probes are saved into csv file.
#-
#- Revision 1.6  2006/03/14 19:48:30  sunya
#- Changed icp (internal control probe) QC plots.
#- Added function for icp -> icpPlot
#- ANOVA analysis now performs probe filtering, but no FDR is calculated.
#- hclusterPlot now calculate correlation coefficient for probes, previously
#- it used Euclidian distance. The distance between arrays is still Euclidean.
#-

Any scripts or data that you put into this service are public.

ABarray documentation built on Nov. 8, 2020, 5:25 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ABarray
Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

R/doPlotFCT.R
In ABarray: Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

Try the ABarray package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ABarray Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

R/doPlotFCT.R In ABarray: Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

Try the ABarray package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ABarray
Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.

R/doPlotFCT.R
In ABarray: Microarray QA and statistical data analysis for Applied Biosystems Genome Survey Microrarray (AB1700) gene expression data.