inst/doc/iCheck.R

### R code from vignette source 'iCheck.Rnw'

###################################################
### code chunk number 1: iCheck.Rnw:53-126
###################################################
  library(iCheck)

  if (!interactive())
  {
    options(rgl.useNULL = TRUE)
  }

  # generate sample probe data 
  set.seed(1234567)
  es.sim = genSimData.BayesNormal(nCpGs = 110, 
    nCases = 20, nControls = 20,
    mu.n = -2, mu.c = 2,
    d0 = 20, s02 = 0.64, s02.c = 1.5, testPara = "var",
    outlierFlag = FALSE, 
    eps = 1.0e-3, applier = lapply) 
  print(es.sim)

  # create replicates
  dat=exprs(es.sim)
  dat[,1]=dat[,2]
  dat[,3]=dat[,4]
  

  exprs(es.sim)=dat
  es.sim$arrayID=as.character(es.sim$arrayID)
  es.sim$arrayID[1]=es.sim$arrayID[2]
  es.sim$arrayID[3]=es.sim$arrayID[4]
  es.sim$arrayID[5:8]="Hela"

  # since simulated data set does not have 'Pass_Fail',
  #  'Tissue_Descr', 'Batch_Run_Date', 'Chip_Barcode',
  #  'Chip_Address', 'Hybridization_Name', 'Subject_ID', 'gender'
  # we generate them now to illustrate the R functions in the package 

  es.sim$Hybridization_Name = paste(es.sim$arrayID, 1:ncol(es.sim), sep="_")
  # assume the first 4 arrays are genetic control samples
  es.sim$Subject_ID = es.sim$arrayID

  es.sim$Pass_Fail = rep("pass", ncol(es.sim))

  # produce genetic control GC samples
  es.sim$Tissue_Descr=rep("CD4", ncol(es.sim))
  # assume the first 4 arrays are genetic control samples
  es.sim$Tissue_Descr[5:8]="Human Hela Cell"

  es.sim$Batch_Run_Date = 1:ncol(es.sim)
  es.sim$Chip_Barcode = 1:ncol(es.sim)
  es.sim$Chip_Address = 1:ncol(es.sim)

  es.sim$gender=rep(1, ncol(es.sim))
  set.seed(12345)
  pos=sample(x=1:ncol(es.sim), size=ceiling(ncol(es.sim)/2), replace=FALSE) 
  es.sim$gender[pos]=0


  # generate sample probe data 
  es.raw = es.sim[-c(1:10),]
  print(es.raw)
  
  # generate QC probe data 
  es.QC = es.sim[c(1:10),]

  # since simulated data set does not have 'Reporter_Group_Name'
  #  we created it now to illustrate the usage of 'plotQCCurves'.
  fDat=fData(es.QC)
  fDat$Reporter_Group_Name=rep("biotin", 10)
  fDat$Reporter_Group_Name[3:4]="cy3_hyb"
  fDat$Reporter_Group_Name[5:6]="housekeeping"
  fDat$Reporter_Group_Name[7:8]="low_stringency_hyb"
  fData(es.QC)=fDat

  print(es.QC)
  


###################################################
### code chunk number 2: iCheck.Rnw:137-138
###################################################
print(table(es.raw$Pass_Fail, useNA="ifany"))


###################################################
### code chunk number 3: iCheck.Rnw:143-149
###################################################
pos<-which(es.raw$Pass_Fail != "pass")
if(length(pos))
{
  es.raw<-es.raw[, -pos]
  es.QC<-es.QC[, -pos]
}


###################################################
### code chunk number 4: iCheck.Rnw:162-185
###################################################

     plotQCCurves(
         esQC=es.QC,
         probes = c("biotin"), #"cy3_hyb", "housekeeping"),
           #"low_stringency_hyb"),
         labelVariable = "subjID",
         hybName = "Hybridization_Name",
         reporterGroupName = "Reporter_Group_Name",
         requireLog2 = FALSE,
         projectName = "test",
         plotOutPutFlag = FALSE,
         cex = 1,
         ylim = NULL,
         xlab = "",
         ylab = "log2(intensity)",
         lwd = 3,
         mar = c(10, 4, 4, 2) + 0.1,
         las = 2,
         cex.axis = 1,
         sortFlag = TRUE,
         varSort = c("Batch_Run_Date", "Chip_Barcode", "Chip_Address"),
         timeFormat = c("%m/%d/%Y", NA, NA)
       )  


###################################################
### code chunk number 5: iCheck.Rnw:225-245
###################################################

     R2PlotFunc(
         es=es.raw,
         hybName = "Hybridization_Name",
         arrayType = "GC",
         GCid = c("128115", "Hela", "Brain"),
         probs = seq(0, 1, 0.25),
         col = gplots::greenred(75),
         labelVariable = "subjID",
         outFileName = "test_R2_raw.pdf",
         title = "Raw Data R^2 Plot",
         requireLog2 = FALSE,
         plotOutPutFlag = FALSE,
         las = 2,
         keysize = 1,
         margins = c(10, 10),
         sortFlag = TRUE,
         varSort=c("Batch_Run_Date", "Chip_Barcode", "Chip_Address"),
         timeFormat=c("%m/%d/%Y", NA, NA)
       )  


###################################################
### code chunk number 6: iCheck.Rnw:254-268
###################################################
print(table(es.raw$Tissue_Descr, useNA="ifany"))

# for different data sets, the label for GC arrays might
# be different.
pos.del<-which(es.raw$Tissue_Descr == "Human Hela Cell")
cat("No. of GC arrays=", length(pos.del), "\n")

if(length(pos.del))
{
  es.raw<-es.raw[,-pos.del]
  es.QC<-es.QC[,-pos.del]
  print(dims(es.raw))
  print(dims(es.QC))
}


###################################################
### code chunk number 7: iCheck.Rnw:276-296
###################################################

     R2PlotFunc(
         es=es.raw,
         arrayType = c("replicates"),
         GCid = c("128115", "Hela", "Brain"),
         probs = seq(0, 1, 0.25),
         col = gplots::greenred(75),
         labelVariable = "subjID",
         outFileName = "test_R2_raw.pdf",
         title = "Raw Data R^2 Plot",
         requireLog2 = FALSE,
         plotOutPutFlag = FALSE,
         las = 2,
         keysize = 1,
         margins = c(10, 10),
         sortFlag = TRUE,
         varSort=c("Subject_ID", "Hybridization_Name", "Batch_Run_Date", "Chip_Barcode", "Chip_Address"),
         timeFormat=c(NA, NA, "%m/%d/%Y", NA, NA)
       )  



###################################################
### code chunk number 8: iCheck.Rnw:312-321
###################################################
    densityPlots(
      es = es.raw, 
      requireLog2 = FALSE,
      myxlab = "expression level", 
      datExtrFunc = exprs, 
      fileFlag = FALSE, 
      fileFormat = "ps", 
      fileName = "densityPlots_sim.ps")



###################################################
### code chunk number 9: iCheck.Rnw:347-364
###################################################
     quantilePlot(
         dat=exprs(es.raw),
         fileName,
         probs = c(0, 0.05, 0.25, 0.5, 0.75, 0.95, 1),
         plotOutPutFlag = FALSE,
         requireLog2 = FALSE,
         sortFlag = TRUE,
         cex = 1,
         ylim = NULL,
         xlab = "",
         ylab = "log2(intensity)",
         lwd = 3,
         main = "Trajectory plot of quantiles\n(sample arrays)",
         mar = c(15, 4, 4, 2) + 0.1,
         las = 2,
         cex.axis = 1
         )


###################################################
### code chunk number 10: iCheck.Rnw:374-392
###################################################
# note we need to take log2 transformation
# if requireLog2 = TRUE.
requireLog2 = FALSE
if(requireLog2)
{  
  minVec<-apply(log2(exprs(es.raw)), 1, min, na.rm=TRUE)
  # suppose the cutoff is 0.5
  print(sum(minVec< 0.5))
  pos.del<-which(minVec<0.5)

  cat("Number of gene probes with outlying expression levels>>",
  length(pos.del), "\n")
  if(length(pos.del))
  {
    es.raw<-es.raw[-pos.del,]
  }
}



###################################################
### code chunk number 11: iCheck.Rnw:407-432
###################################################

     plotSamplep95p05(
         es=es.raw,
         labelVariable = "memSubj",
         requireLog2 = FALSE,
         projectName = "test",
         plotOutPutFlag = FALSE,
         cex = 1,
         ylim = NULL,
         xlab = "",
         ylab = "",
         lwd = 1.5,
         mar = c(10, 4, 4, 2) + 0.1,
         las = 2,
         cex.axis=1.5,
         title = "Trajectory of p95/p05",
         cex.legend = 1.5,
         cex.lab = 1.5,
         legendPosition = "topright",
         cut1 = 10,
         cut2 = 6,
         sortFlag = TRUE,
         varSort = c("Batch_Run_Date", "Chip_Barcode", "Chip_Address"),
         timeFormat = c("%m/%d/%Y", NA, NA),
         verbose = FALSE)


###################################################
### code chunk number 12: iCheck.Rnw:441-454
###################################################
p95<-quantile(exprs(es.raw), prob=0.95)
p05<-quantile(exprs(es.raw), prob=0.05)

r<-p95/p05
pos.del<-which(r<6)
print(pos.del)

if(length(pos.del))
{
  es.raw<-es.raw[,-pos.del]
  es.QC<-es.QC[,-pos.del]
}



###################################################
### code chunk number 13: iCheck.Rnw:471-480
###################################################

     pcaObj<-getPCAFunc(es=es.raw,
                labelVariable = "subjID",
                requireLog2 = FALSE,
                corFlag = FALSE
              
               
     )



###################################################
### code chunk number 14: iCheck.Rnw:487-508
###################################################

     pca2DPlot(pcaObj=pcaObj,
               plot.dim = c(1,2),
               labelVariable = "memSubj",
               outFileName = "test_pca_raw.pdf",
               title = "Scatter plot of pcas (memSubj)",
               plotOutPutFlag = FALSE,
               mar = c(5, 4, 4, 2) + 0.1,
               lwd = 1.5,
               equalRange = TRUE,
               xlab = NULL,
               ylab = NULL,
               xlim = NULL,
               ylim = NULL,
               cex.legend = 1.5,
               cex = 1.5,
               cex.lab = 1.5,
               cex.axis = 1.5,
               legendPosition = "topright"
             
               )


###################################################
### code chunk number 15: iCheck.Rnw:517-519
###################################################
tt <- es.raw
es.q<-lumiN(tt, method="quantile")


###################################################
### code chunk number 16: iCheck.Rnw:532-560
###################################################
     pcaObj<-getPCAFunc(es=es.q,
                labelVariable = "subjID",
                requireLog2 = FALSE,
                corFlag = FALSE
                
     )


     pca2DPlot(pcaObj=pcaObj,
               plot.dim = c(1,2),
               labelVariable = "memSubj",
               outFileName = "test_pca_raw.pdf",
               title = "Scatter plot of pcas (memSubj)\n(log2 transformed and quantile normalized)",
               plotOutPutFlag = FALSE,
               mar = c(5, 4, 4, 2) + 0.1,
               lwd = 1.5,
               equalRange = TRUE,
               xlab = NULL,
               ylab = NULL,
               xlim = NULL,
               ylim = NULL,
               cex.legend = 1.5,
               cex = 1.5,
               cex.lab = 1.5,
               cex.axis = 1.5,
               legendPosition = "topright"
               )



###################################################
### code chunk number 17: iCheck.Rnw:588-600
###################################################

res.limma=lmFitWrapper(
  es=es.q, 
  formula=~as.factor(memSubj), 
  pos.var.interest = 1,
  pvalAdjMethod="fdr", 
  alpha=0.05, 
  probeID.var="probe", 
  gene.var="gene", 
  chr.var="chr", 
  verbose=TRUE)



###################################################
### code chunk number 18: iCheck.Rnw:610-626
###################################################

res.glm=glmWrapper(
  es=es.q,
  formula = xi~as.factor(memSubj), 
  pos.var.interest = 1,
  family=gaussian,
  logit=FALSE, 
  pvalAdjMethod="fdr", 
  alpha = 0.05, 
  probeID.var = "probe", 
  gene.var = "gene", 
  chr.var = "chr", 
  applier=lapply,
  verbose=TRUE 
  )



###################################################
### code chunk number 19: iCheck.Rnw:634-650
###################################################

res.lkh=lkhrWrapper(
  es=es.q,
  formulaReduced = xi~as.factor(memSubj), 
  formulaFull = xi~as.factor(memSubj)+gender, 
  family=gaussian,
  logit=FALSE, 
  pvalAdjMethod="fdr", 
  alpha = 0.05, 
  probeID.var = "probe", 
  gene.var = "gene", 
  chr.var = "chr", 
  applier=lapply,
  verbose=TRUE 
  )



###################################################
### code chunk number 20: iCheck.Rnw:665-681
###################################################

    boxPlots(
      resFrame = res.limma$frame, 
      es = es.sim, 
      col.resFrame = c("probeIDs", "stats", "pval", "p.adj"), 
      var.pheno = "memSubj", 
      var.probe = "probe", 
      var.gene = "gene", 
      var.chr = "chr", 
      nTop = 20, 
      myylab = "expression level", 
      datExtrFunc = exprs, 
      fileFlag = FALSE, 
      fileFormat = "ps", 
      fileName = "boxPlots_sim.ps")



###################################################
### code chunk number 21: iCheck.Rnw:690-707
###################################################

    # regard memSubj as continuos for illustration purpose
    scatterPlots(
      resFrame = res.limma$frame, 
      es = es.sim, 
      col.resFrame = c("probeIDs", "stats", "pval", "p.adj"), 
      var.pheno = "memSubj", 
      var.probe = "probe", 
      var.gene = "gene", 
      var.chr = "chr", 
      nTop = 20, 
      myylab = "expression level", 
      datExtrFunc = exprs, 
      fileFlag = FALSE, 
      fileFormat = "ps", 
      fileName = "scatterPlots_sim.ps")



###################################################
### code chunk number 22: sessionInfo
###################################################
toLatex(sessionInfo())

Try the iCheck package in your browser

Any scripts or data that you put into this service are public.

iCheck documentation built on Nov. 8, 2020, 11:09 p.m.