R/QCreport.R

Defines functions data2report

##################################################################################
##############      Quality Control Report on Expression Data       ##############
##################################################################################


## By Sonia Tarazona
## 25-June-2013
## Modified: 30-March-2015


### Generating data for QC report
##################################


data2report = function(input, factor = NULL, norm = FALSE) {  
    
  ## Biotype detection
  if (!is.null(featureData(input)$Biotype)) {  # BIOTYPES
    mybiotdet = biodetection.dat(input, factor = factor, k = 0); biot.avail = TRUE
    mycountsbio1 = countsbio.dat(input, factor = factor, norm = norm)
  } else {  # NO biotypes
    mybiotdet = NULL; mycountsbio1 = NULL; biot.avail = FALSE
  }
  
  ## Sequencing depth & Expression quantification
  mysat = saturation.dat(input, k = 0, ndepth = 6)
  mycountsbio2 = countsbio.dat(input, factor = factor, norm = norm)
  
  ## Bias detection
  if (!is.null(featureData(input)$Length)) {  # LENGTH
    mylength = length.dat(input, factor = factor, norm = norm); length.avail = TRUE
  } else { mylength = NULL; length.avail = FALSE }
  
  if (!is.null(featureData(input)$GC)) {  # GC
    myGC = GC.dat(input, factor = factor, norm = norm); GC.avail = TRUE
  } else { myGC = NULL; GC.avail = FALSE }
  
  myCD = cd.dat(input, norm = norm, refColumn = 1)
  
  
  ## PCA
  myPCA = PCA.dat(input, norm = norm, logtransf = FALSE)
  
  
  list("data" = list("biodet" = mybiotdet, "countsbiot" = mycountsbio1, "saturation" = mysat, 
                     "countsampl" = mycountsbio2, "length" = mylength, "GC" = myGC, "countdist" = myCD, "PCA" = myPCA),
       "parameters" = list("biotypes" = biot.avail, "length" = length.avail, "GC" = GC.avail))
  
}





##################################################################################


### Generating QC report
##################################



QCreport = function (input, file = NULL, samples = NULL, factor = NULL, norm = FALSE) {

  if (is.null(file))
	file <- paste("QCreport", format(Sys.time(), "_%Y%b%d_%H_%M_%S"), ".pdf", sep = "")  

  QCinfo = data2report(input = input, factor = factor, norm = norm)
  
  samples2 = colnames(QCinfo$data$countdist$data2plot)
    
  
  # NO factor
  if (is.null(factor)) {    
    if (length(samples) != 2) {
      stop("ERROR: Factor was not specified and the number of samples to be plotted is not equal to 2.\n
           Please, either indicate the factor or the two samples to be plotted.\n")
    } else { 
      niveles = NULL 
      if (is.numeric(samples)) { samples = colnames(QCinfo$data$countdist$data2plot)[samples] }
    }
    
    
  # FACTOR  
  } else {    
    myfactor = as.factor(pData(input)[,factor])
    niveles = as.character(unique(myfactor))
    
    if (length(niveles) > 2) {  # more than two levels
      if (length(samples) != 2) {
        stop("ERROR: The factor has more than two levels (conditions).\n
           Please, specify which two conditions are to be plotted.\n")
      } else {
        if (is.numeric(samples)) { samples = colnames(QCinfo$data$countsampl$result)[samples] }
        niveles = samples
      }
    } 
    
    if (length(niveles) == 2) {  # 2 samples
      samples = niveles
    }
  }  
  
    
  
  pdf(file, paper = "a4", width = 8.27, height = 11.69)
  
  
  # Page 0
  layout(matrix(c(1,2,3), nrow = 3, ncol = 1, byrow = TRUE), heights = c(25,15,60))
  
  par(mar = c(0,0,0,0))
  ## TITLE
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  text(5,4, "Quality Control of Expression Data", adj = 0.5, cex = 3, col = "brown3", font = 2)
  text(5,1, paste("Generated by NOISeq on", format(Sys.time(), "%d %b %Y, %H:%M:%S")), 
       adj = 0.5, font = 3, cex = 1.5)
  
  ## SUBTITLE
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  text(2,3, "Content", adj = 0.5, font = 2, cex = 2, col = "dodgerblue4")
  
  ## Content
  par(mar = c(0,3,0,3))
  lugares = c(1,3)
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  
  text(lugares[1], 10, "Plot", adj = 0, font = 3, cex = 1)
  text(lugares[2], 10, "Description", adj = 0, font = 3, cex = 1)
  abline(h = 9.8, lty = 2, col = "grey")
  
  empiezo = 9.3
  bajo = 0.5
  text(lugares[1], empiezo, "Biotype detection", adj = 0, font = 2, cex = 1)
  if (QCinfo$parameters$biotypes) {
    text(lugares[2], empiezo, "Biotype abundance in the genome with %genes detected (counts > 0) in the sample/condition.", 
         adj = 0, font = 1, cex = 1)
    text(lugares[2], empiezo-bajo/2, "Biotype abundance within the sample/condition.", 
         adj = 0, font = 1, cex = 1)
  } else {
    text(lugares[2], empiezo, "Plot not available. Biotypes information was not provided.", 
         adj = 0, font = 1, cex = 1)
  }  
      
  empiezo = empiezo-bajo-0.3
  text(lugares[1], empiezo, "Biotype expression", adj = 0, font = 2, cex = 1)
  if (QCinfo$parameters$biotypes) {
    text(lugares[2], empiezo, "Distribution of gene counts per million per biotype in sample/condition (only genes with counts > 0).", 
         adj = 0, font = 1, cex = 1)
  } else {
    text(lugares[2], empiezo, "Plot not available. Biotypes information was not provided.", 
         adj = 0, font = 1, cex = 1)
  }    
                     
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "Saturation", adj = 0, font = 2, cex = 1)
  text(lugares[2], empiezo, "Number of detected genes (counts > 0) per sample across different sequencing depths", adj = 0, font = 1, cex = 1)
  
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "Expression boxplot", adj = 0, font = 2, cex = 1)
  text(lugares[2], empiezo, "Distribution of gene counts per million (all biotypes) in each sample/condition", adj = 0, font = 1, cex = 1)
  
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "Expression barplot", adj = 0, font = 2, cex = 1)
  text(lugares[2], empiezo, "Percentage of genes with >0, >1, >2, >5 or >10 counts per million in each sample/condition.", adj = 0, font = 1, cex = 1)
  
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "Length bias", adj = 0, font = 2, cex = 1)
  if (QCinfo$parameters$length) {
    text(lugares[2], empiezo, "Mean gene expression per each length bin. Fitted curve and diagnostic test.", 
         adj = 0, font = 1, cex = 1)
  } else {
    text(lugares[2], empiezo, "Plot not available. Gene length was not provided.", 
         adj = 0, font = 1, cex = 1)
  }
  
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "GC content bias", adj = 0, font = 2, cex = 1)
  if (QCinfo$parameters$GC) {
    text(lugares[2], empiezo, "Mean gene expression per each GC content bin. Fitted curve and diagnostic test.", 
         adj = 0, font = 1, cex = 1)
  } else {
    text(lugares[2], empiezo, "Plot not available. Gene GC content was not provided.", 
         adj = 0, font = 1, cex = 1)
  }
  
  empiezo = empiezo-bajo
  text(lugares[1], empiezo, "RNA composition bias", adj = 0, font = 2, cex = 1)
  text(lugares[2], empiezo, "Density plots of log fold changes (M) between pairs of samples.", adj = 0, font = 1, cex = 1)
  text(lugares[2], empiezo-bajo/2, "Confidence intervals for the median of M values.", adj = 0, font = 1, cex = 1)

  empiezo = empiezo-bajo-0.3
  text(lugares[1], empiezo, "Exploratory PCA", adj = 0, font = 2, cex = 1)
  text(lugares[2], empiezo, "Principal Component Analysis score plots for PC1 vs PC2, and PC1 vs PC3.", adj = 0, font = 1, cex = 1)

  
  
  
  # Page 1 (only if biotypes are provided)
  if (QCinfo$parameters$biotypes) {
    layout(matrix(c(1,1,2,3,4,5), nrow = 3, ncol = 2, byrow = TRUE), heights = c(10,45,45))
        
    ## SUBTITLE
    par(mar = c(0,0,0,0))
    plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
    text(5,6, "Biotype detection", adj = 0.5, font = 2, cex = 2, col = "dodgerblue4")
    
    ## BIODETECTION PLOTS
    biodetection.plot(QCinfo$data$biodet, samples = samples, plottype = "comparison", toreport = TRUE)
    countsbio.plot(QCinfo$data$countsbiot, toplot = "global", samples = samples[1], plottype = "boxplot",
                   ylim = range(log2(1+QCinfo$data$countsbiot$result)), toreport = TRUE)  
    countsbio.plot(QCinfo$data$countsbiot, toplot = "global", samples = samples[2], plottype = "boxplot",
                   ylim = range(log2(1+QCinfo$data$countsbiot$result)), toreport = TRUE)  
  }
  
  
  
  # Page 2
  #layout(matrix(c(1,2,3,8,4,9,1,5,6,8,7,9), nrow = 6, ncol = 2, byrow = FALSE), heights = c(10,35,10,5,35,5))
  layout(matrix(c(1,2,3,8,4,1,5,6,8,7), nrow = 5, ncol = 2, byrow = FALSE), heights = c(10,35,10,5,40))
  
  par(mar = c(0,0,0,0))
  ## SUBTITLE
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  text(5,5, "Sequencing depth & Expression quantification", adj = 0.5, font = 2, cex = 2, col = "dodgerblue4")
  
  ## SEQUENCING DEPTH AND EXPRESSION QUANTIFICATION PLOTS 
  if (is.null(niveles)) {  # NO factor
    saturation.plot(QCinfo$data$saturation, samples = samples, toplot = 1, 
                    yleftlim = c(0,unlist(QCinfo$data$saturation$bionum[1])), toreport = TRUE)    
  } else {   # FACTOR
    par(mar = c(5.1,4.1,4.1,2.1))
    saturation.plot(QCinfo$data$saturation, samples = samples2[myfactor == niveles[1]], 
                    toplot = 1, toreport = TRUE, 
                    yleftlim = c(0,unlist(QCinfo$data$saturation$bionum[1])), )
    if (sum(myfactor == niveles[1]) > 2) {
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
    }    
  } 
  
  par(mar = c(0,0,0,0))
  countsbio.plot(QCinfo$data$countsampl, toplot = "global", samples = samples, plottype = "boxplot",
                 toreport = TRUE)
  
  if (is.null(niveles)) {  # NO factor
    par(mar = c(0,0,0,0))
    plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
    plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
    
  } else {  # FACTOR
    saturation.plot(QCinfo$data$saturation, samples = samples2[myfactor == niveles[2]], 
                    toplot = 1, yleftlim = c(0,unlist(QCinfo$data$saturation$bionum[1])), toreport = TRUE)
    if (sum(myfactor == niveles[2]) > 2) {
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
    }  
  }  
  
  countsbio.plot(QCinfo$data$countsampl, toplot = "global", samples = samples, plottype = "barplot",
                 toreport = TRUE)
                 
  
  
  
  #####  BIAS DETECTION
  QQ = 0.05
  
  # Page 3
  layout(matrix(c(1,1,2,2,3,4,5,5,6,7), nrow = 5, ncol = 2, byrow = TRUE), heights = c(10,10,35,10,35))
  
  par(mar = c(0,0,0,0))
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  text(5,5, "Sequencing bias detection", adj = 0.5, font = 2, cex = 2, col = "dodgerblue4")
  
  if (QCinfo$parameters$length) {   ## LENGTH
    
    if (QCinfo$parameters$GC) {  ## LENGTH & GC      
      
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for feature length bias", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      laF = lapply(QCinfo$data$length$RegressionModels[samples],
                   function (x) summary(x)$"fstatistic")
      pvalores = sapply(laF, function (x) pf(x[1], df1 = x[2], df2 = x[3], lower.tail = FALSE))
      misR2 = sapply(QCinfo$data$length$RegressionModels[samples],
                     function (x) summary(x)$"r.squared")
      
      if (min(pvalores) < QQ) { 
        if (max(misR2) > 0.7) {
          text(5,5, "FAILED. At least one of the model p-values was lower than 0.05 and R2 > 70%.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting length bias is recommended.", adj = 0.5, font = 1, cex = 1)
        } else {
          text(5,5, "WARNING. At least one of the model p-values was lower than 0.05, but R2 < 70% for at least one condition.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting length bias could be advisable.",               
               adj = 0.5, font = 1, cex = 1)
          text(5,2, "Plese check in the plots below the strength of the relationship between length and expression.", 
               adj = 0.5, font = 1, cex = 1)
        }
        
      } else {
        text(5,4, "PASSED. No normalization for correcting length bias is required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      
      length.plot(QCinfo$data$length, toreport = TRUE, samples = samples)
      
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for GC content bias", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      laF = lapply(QCinfo$data$GC$RegressionModels[samples],
                   function (x) summary(x)$"fstatistic")
      pvalores = sapply(laF, function (x) pf(x[1], df1 = x[2], df2 = x[3], lower.tail = FALSE))
      misR2 = sapply(QCinfo$data$GC$RegressionModels[samples],
                     function (x) summary(x)$"r.squared")
      
      if (min(pvalores) < QQ) { 
        if (max(misR2) > 0.7) {
          text(5,5, "FAILED. At least one of the model p-values was lower than 0.05 and R2 > 70%.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting GC content bias is recommended.", adj = 0.5, font = 1, cex = 1)
        } else {
          text(5,5, "WARNING. At least one of the model p-values was lower than 0.05, but R2 < 70% for at least one condition.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting GC content bias could be advisable.",               
               adj = 0.5, font = 1, cex = 1)
          text(5,2, "Plese check in the plots below the strength of the relationship between GC content and expression.", 
               adj = 0.5, font = 1, cex = 1)
        }
        
      } else {
        text(5,4, "PASSED. No normalization for correcting GC content bias is required.", adj = 0.5, font = 1, cex = 1)
      }
      
      par(mar = c(5.1,4.1,4.1,2.1))
      GC.plot(QCinfo$data$GC, toreport = TRUE, samples = samples)      
      
      
      # Page 4
      layout(matrix(c(1,1,2,3,4,4), nrow = 3, ncol = 2, byrow = TRUE), heights = c(10,40,45))
      
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for differences in RNA composition", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      if ("FAILED" %in% QCinfo$data$countdist$DiagnosticTest[,"Diagnostic Test"]) {
        text(5,5, "FAILED. There is a pair of samples with significantly different RNA composition", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is required.", adj = 0.5, font = 1, cex = 1)
      } else {
        text(5,5, "PASSED. The pairs of compared samples do not present significant differences in RNA composition.", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is NOT required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      if (length(samples2) < 14) {
        cd.plot(QCinfo$data$countdist, samples = samples2) 
      } else {
        cd.plot(QCinfo$data$countdist, samples = setdiff(samples2, QCinfo$data$countdist$refColumn)[1:12]) 
      }
      
      
      par(mar = c(0,0,0,0))
      lugares = c(1,4.5,6.5,8.5)
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(lugares[1],10, "Confidence intervals for median of M values", adj = 0, font = 2, cex = 1.2)
      text(lugares[1], 9.4, "Sample", adj = 0, font = 1, cex = 1)
      abline(h = 9.2, lty = 2, col = "grey")
      for (j in 1:3) {
        text(lugares[j+1] , 9.4, colnames(QCinfo$data$countdist$DiagnosticTest)[j], adj = 0, font = 1, cex = 1)   
        
        for (i in 1:min(30,nrow(QCinfo$data$countdist$DiagnosticTest))) {
          
          if (j == 1) text(lugares[j], 9.2-i*0.3, rownames(QCinfo$data$countdist$DiagnosticTest)[i], 
                         adj = 0, font = 1, cex = 1)
          if (j < 3) text(lugares[j+1], 9.2-i*0.3, adj = 1, font = 1, cex = 1,
                          round(as.numeric(QCinfo$data$countdist$DiagnosticTest[i,j]),4))
          if (j == 3) text(lugares[j+1], 9.2-i*0.3, adj = 0, font = 1, cex = 1,
                           QCinfo$data$countdist$DiagnosticTest[i,j]) 
        }        
      }
      
      if (nrow(QCinfo$data$countdist$DiagnosticTest) > 30) {
        print("WARNING: In Diagnostic Test for RNA composition, the confidence intervals are shown for only 
              the first 30 samples.")
      }
      
      
      
          
      
    } else {  ## LENGTH & NO GC
      
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for feature length bias", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      laF = lapply(QCinfo$data$length$RegressionModels[samples],
                   function (x) summary(x)$"fstatistic")
      pvalores = sapply(laF, function (x) pf(x[1], df1 = x[2], df2 = x[3], lower.tail = FALSE))
      misR2 = sapply(QCinfo$data$length$RegressionModels[samples],
                     function (x) summary(x)$"r.squared")
      
      if (min(pvalores) < QQ) { 
        if (max(misR2) > 0.7) {
          text(5,5, "FAILED. At least one of the model p-values was lower than 0.05 and R2 > 70%.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting length bias is recommended.", adj = 0.5, font = 1, cex = 1)
        } else {
          text(5,5, "WARNING. At least one of the model p-values was lower than 0.05, but R2 < 70% for at least one condition.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting length bias could be advisable.",               
               adj = 0.5, font = 1, cex = 1)
          text(5,2, "Plese check in the plots below the strength of the relationship between length and expression.", 
               adj = 0.5, font = 1, cex = 1)
        }
        
      } else {
        text(5,4, "PASSED. No normalization for correcting length bias is required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      
      length.plot(QCinfo$data$length, toreport = TRUE, samples = samples)
      
      
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for differences in RNA composition", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      if ("FAILED" %in% QCinfo$data$countdist$DiagnosticTest[,"Diagnostic Test"]) {
        text(5,5, "FAILED. There is a pair of samples with significantly different RNA composition", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is required.", adj = 0.5, font = 1, cex = 1)
      } else {
        text(5,5, "PASSED. The pairs of compared samples do not present significant differences in RNA composition.", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is NOT required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      if (length(samples2) < 14) {
        cd.plot(QCinfo$data$countdist, samples = samples2) 
      } else {
        cd.plot(QCinfo$data$countdist, samples = setdiff(samples2, QCinfo$data$countdist$refColumn)[1:12]) 
      }
      
      
      
      par(mar = c(0,0,0,0))
      lugares = c(1,4.5,6.5,8.5)
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(lugares[1],10, "Confidence intervals for median of M values", adj = 0, font = 2, cex = 1.2)
      text(lugares[1], 9.4, "Sample", adj = 0, font = 1, cex = 1)
      abline(h = 9.2, lty = 2, col = "grey")
      for (j in 1:3) {
        text(lugares[j+1] , 9.4, colnames(QCinfo$data$countdist$DiagnosticTest)[j], adj = 0, font = 1, cex = 1)   
        
        for (i in 1:min(30,nrow(QCinfo$data$countdist$DiagnosticTest))) {
          
          if (j == 1) text(lugares[j], 9.2-i*0.3, rownames(QCinfo$data$countdist$DiagnosticTest)[i], 
                           adj = 0, font = 1, cex = 1)
          if (j < 3) text(lugares[j+1], 9.2-i*0.3, adj = 1, font = 1, cex = 1,
                          round(as.numeric(QCinfo$data$countdist$DiagnosticTest[i,j]),4))
          if (j == 3) text(lugares[j+1], 9.2-i*0.3, adj = 0, font = 1, cex = 1,
                           QCinfo$data$countdist$DiagnosticTest[i,j]) 
        }        
      }
      
      if (nrow(QCinfo$data$countdist$DiagnosticTest) > 30) {
        print("WARNING: In Diagnostic Test for RNA composition, the confidence intervals are shown for only 
              the first 30 samples.")
      }
            
 
      
    }    
    
  } else {  ## NO LENGTH
    
    if (QCinfo$parameters$GC) {   ## NO LENGTH & GC
            
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for GC content bias", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      laF = lapply(QCinfo$data$GC$RegressionModels[samples],
                   function (x) summary(x)$"fstatistic")
      pvalores = sapply(laF, function (x) pf(x[1], df1 = x[2], df2 = x[3], lower.tail = FALSE))
      misR2 = sapply(QCinfo$data$GC$RegressionModels[samples],
                     function (x) summary(x)$"r.squared")
      
      if (min(pvalores) < QQ) { 
        if (max(misR2) > 0.7) {
          text(5,5, "FAILED. At least one of the model p-values was lower than 0.05 and R2 > 70%.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting GC content bias is recommended.", adj = 0.5, font = 1, cex = 1)
        } else {
          text(5,5, "WARNING. At least one of the model p-values was lower than 0.05, but R2 < 70% for at least one condition.", 
               adj = 0.5, font = 1, cex = 1)
          text(5,3, "Normalization for correcting GC content bias could be advisable.",               
               adj = 0.5, font = 1, cex = 1)
          text(5,2, "Plese check in the plots below the strength of the relationship between GC content and expression.", 
               adj = 0.5, font = 1, cex = 1)
        }
        
      } else {
        text(5,4, "PASSED. No normalization for correcting GC content bias is required.", adj = 0.5, font = 1, cex = 1)
      }
      
      par(mar = c(5.1,4.1,4.1,2.1))
      GC.plot(QCinfo$data$GC, toreport = TRUE, samples = samples)      
      
            
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for differences in RNA composition", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      if ("FAILED" %in% QCinfo$data$countdist$DiagnosticTest[,"Diagnostic Test"]) {
        text(5,5, "FAILED. There is a pair of samples with significantly different RNA composition", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is required.", adj = 0.5, font = 1, cex = 1)
      } else {
        text(5,5, "PASSED. The pairs of compared samples do not present significant differences in RNA composition.", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is NOT required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      if (length(samples2) < 14) {
        cd.plot(QCinfo$data$countdist, samples = samples2) 
      } else {
        cd.plot(QCinfo$data$countdist, samples = setdiff(samples2, QCinfo$data$countdist$refColumn)[1:12]) 
      }
      
      
      
      par(mar = c(0,0,0,0))
      lugares = c(1,4.5,6.5,8.5)
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(lugares[1],10, "Confidence intervals for median of M values", adj = 0, font = 2, cex = 1.2)
      text(lugares[1], 9.4, "Sample", adj = 0, font = 1, cex = 1)
      abline(h = 9.2, lty = 2, col = "grey")
      for (j in 1:3) {
        text(lugares[j+1] , 9.4, colnames(QCinfo$data$countdist$DiagnosticTest)[j], adj = 0, font = 1, cex = 1)   
        
        for (i in 1:min(30,nrow(QCinfo$data$countdist$DiagnosticTest))) {
          
          if (j == 1) text(lugares[j], 9.2-i*0.3, rownames(QCinfo$data$countdist$DiagnosticTest)[i], 
                           adj = 0, font = 1, cex = 1)
          if (j < 3) text(lugares[j+1], 9.2-i*0.3, adj = 1, font = 1, cex = 1,
                          round(as.numeric(QCinfo$data$countdist$DiagnosticTest[i,j]),4))
          if (j == 3) text(lugares[j+1], 9.2-i*0.3, adj = 0, font = 1, cex = 1,
                           QCinfo$data$countdist$DiagnosticTest[i,j]) 
        }        
      }
      
      if (nrow(QCinfo$data$countdist$DiagnosticTest) > 30) {
        print("WARNING: In Diagnostic Test for RNA composition, the confidence intervals are shown for only 
              the first 30 samples.")
      }
      
      
    } else {  ## NO LENGTH & NO GC
      
      par(mar = c(0,0,0,0))
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(5,8, "Diagnostic plot for differences in RNA composition", adj = 0.5, font = 4, cex = 1.5, col = "aquamarine4")
      
      if ("FAILED" %in% QCinfo$data$countdist$DiagnosticTest[,"Diagnostic Test"]) {
        text(5,5, "FAILED. There is a pair of samples with significantly different RNA composition", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is required.", adj = 0.5, font = 1, cex = 1)
      } else {
        text(5,5, "PASSED. The pairs of compared samples do not present significant differences in RNA composition.", 
             adj = 0.5, font = 1, cex = 1)
        text(5,3, "Normalization for correcting this bias is NOT required.", adj = 0.5, font = 1, cex = 1)
      }
      
      
      
      par(mar = c(5.1,4.1,4.1,2.1))
      if (length(samples2) < 14) {
        cd.plot(QCinfo$data$countdist, samples = samples2) 
      } else {
        cd.plot(QCinfo$data$countdist, samples = setdiff(samples2, QCinfo$data$countdist$refColumn)[1:12]) 
      }
      
      
      
      par(mar = c(0,0,0,0))
      lugares = c(1,4.5,6.5,8.5)
      plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
      text(lugares[1],10, "Confidence intervals for median of M values", adj = 0, font = 2, cex = 1.2)
      text(lugares[1], 9.4, "Sample", adj = 0, font = 1, cex = 1)
      abline(h = 9.2, lty = 2, col = "grey")
      for (j in 1:3) {
        text(lugares[j+1] , 9.4, colnames(QCinfo$data$countdist$DiagnosticTest)[j], adj = 0, font = 1, cex = 1)   
        
        for (i in 1:min(30,nrow(QCinfo$data$countdist$DiagnosticTest))) {
          
          if (j == 1) text(lugares[j], 9.2-i*0.3, rownames(QCinfo$data$countdist$DiagnosticTest)[i], 
                           adj = 0, font = 1, cex = 1)
          if (j < 3) text(lugares[j+1], 9.2-i*0.3, adj = 1, font = 1, cex = 1,
                          round(as.numeric(QCinfo$data$countdist$DiagnosticTest[i,j]),4))
          if (j == 3) text(lugares[j+1], 9.2-i*0.3, adj = 0, font = 1, cex = 1,
                           QCinfo$data$countdist$DiagnosticTest[i,j]) 
        }        
      }
      
      if (nrow(QCinfo$data$countdist$DiagnosticTest) > 30) {
        print("WARNING: In Diagnostic Test for RNA composition, the confidence intervals are shown for only 
              the first 30 samples.")
      }
      
      
    }
    
    
  }
  
  
  # Last page (for PCA)
  
  # Page 4
  layout(matrix(c(1,1,2,3,4,4), nrow = 3, ncol = 2, byrow = TRUE), heights = c(30,40,40))
  
  par(mar = c(0,0,0,0))
  plot(1:10, 1:10, type = "n", axes = FALSE, xlab = "", ylab = "")
  text(5,6, "Exploratory PCA", adj = 0.5, font = 2, cex = 2, col = "dodgerblue4")
  text(5,4, "Use this plot to see if samples are clustered according to the experimental design.", 
       adj = 0.5, font = 1, cex = 1)
  text(5,3, "Use ARSyNseq function to correct potential batch effects.", adj = 0.5, font = 1, cex = 1)
  
  if (is.null(factor)) factor = colnames(QCinfo$data$PCA$factors)[1]
    
  par(mar = c(5.1,4.1,4.1,2.1))
  PCA.plot(QCinfo$data$PCA, samples = 1:2, factor = factor) 
  
  par(mar = c(5.1,4.1,4.1,2.1))
  PCA.plot(QCinfo$data$PCA, samples = c(1,3), factor = factor) 

  #*****#
  
  dev.off()
  
}
  
  

Try the NOISeq package in your browser

Any scripts or data that you put into this service are public.

NOISeq documentation built on Nov. 8, 2020, 5:10 p.m.