MultiBaC: Multiomic Batch effect Correction

Documented in batchEstPlot explained_varPlot inner_relPlot plot.mbac plot_pca

#' @import grDevices

#' Plot Method for mbac
#'
#' plot function for \code{mbac} class.
#'
#' @aliases plot.mbac plot,mbac-method
#' @param x mbac object generated by *createMbac*, *ARSyNbac* or *MultiBaC*.
#' @param y Currently not used.
#' @param typeP type of plot showed. See details.
#' @param col.by.batch Argument for PCA plots. TRUE or FALSE. If TRUE (default), samples are colored according to the batch factor. If FALSE, samples are colored according to the experimental conditions.
#' @param col.per.group Argument for PCA plots. Color for each group (given by batches or experimental conditions). If NULL (default), the colors are taken from a predefined pallete.
#' @param comp2plot Argument for PCA or InnerRel plot. It indicates which components are to be plotted. The default is c(1,2), which means that, in PCA plots, component 1 is plotted in "x" axis and component 2 in "y" axis, and for InnerRel plots, the inner relation plots of components 1 and 2 are to be shown. If more components are indicated, the function will return as many plots as needed to show all the components.
#' @param legend.text A vector of text used to construct a legend for the plot. Argument for PCA plot. If NULL (default) batch or conditions names included in the mbac object are used.
#' @param args.legend list of additional arguments to pass to legend(); names of the list are used as argument names. Only used if legend.text is supplied.
#' @param ... Other graphical arguments.
#'
#' @export
#' @rdname plot
#'
#' @details
#' typeP options are: "def" (default option, "Q2 plot" and "Explained variance plot" in case of MultiBaC and "Explained variance plot" in case of ARSyNbac outputs), "inner" (inner correlation plots for each PLS model acroos the components for MultiBaC output), "pca.org" (PCA plot of original data for MultiBaC or ARSyNbac outputs), "pca.cor" (PCA plot of corrected data for MultiBaC or ARSyNbac outputs), "pca.both" (PCA plots for both original and corrected data for MultiBaC or ARSyNbac outputs), and "batch" ("Batch effect estimation" plot for all the output). Remember that PCA plots can only be generated when all the omics share the same variable space (e.g. gene identifiers are provided as names of variables for all data matrices).
#' While the *plot* function can generate all the plot types described above, each plot can also be independently generated by its corresponding function: *Q2_plot (mbac)*, *explained_varPlot (mbac)*, *plot_pca (mbac, typeP = c("pca.org", "pca.cor", "pca.both"), col.by.batch, col.per.group, comp2plot, legend.text, args.legend)*, *batchEstPlot (mbac, commonOmic)*, or *inner_relPlot (mbac, comp2plot = c(1,2))*.
#'
#' @return A plot is displayed.
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' my_final_mbac <- MultiBaC (my_mbac,
#'                            test.comp = NULL, scale = FALSE,
#'                            center = TRUE, crossval = NULL,
#'                            Variability = 0.90,
#'                            Interaction = TRUE ,
#'                            showplot = FALSE,
#'                            showinfo = FALSE)
#' plot(my_final_mbac)
#
plot.mbac <- function(x, y = NULL, typeP = "def",
                   col.by.batch = TRUE, col.per.group = NULL,
                   comp2plot = c(1,2),
                   legend.text = NULL,
                   args.legend = NULL,
                   ...) {
            if (typeP == "def") {
              # Check
              if (sum(is.element("PLSmodels", names(x))) < 1) {
                explained_varPlot(x)
              } else if (sum(is.element("ARSyNmodels", names(x))) < 1) {
                Q2_plot(x, ...)
              } else if (sum(is.element(c("PLSmodels", "ARSyNmodels"), names(x))) < 1) {
                stop("Default plot is not possible with the current mbac object. Please select another plot type with typeP argument")
              } else {
                initpar <- par()
                on.exit(par(mfrow = initpar$mfrow))
                par(mfrow = c(1,2))
                Q2_plot(x, ...)
                par(xpd = FALSE)
                explained_varPlot(x)
              }

            } else if (typeP == "inner") {
              # Check
              if (sum(is.element("InnerRelation", names(x))) < 1) {
                stop("Default plot is not possible with the current mbac object. Please select another plot type with typeP argument")
              }
              inner_relPlot(x, comp2plot = comp2plot, ...)
            } else if (typeP == "pca.cor") {
              plot_pca(x, col.by.batch = col.by.batch, col.per.group = col.per.group,
                       comp2plot = comp2plot, typeP = typeP, legend.text = legend.text,
                       args.legend = args.legend,...)
            } else if (typeP == "pca.org") {
              plot_pca(x, col.by.batch = col.by.batch, col.per.group = col.per.group,
                       comp2plot = comp2plot, typeP = typeP, legend.text = legend.text,
                       args.legend = args.legend, ...)
            } else if (typeP == "pca.both") {
              plot_pca(x, col.by.batch = col.by.batch, col.per.group = col.per.group,
                       comp2plot = comp2plot, typeP = typeP, legend.text = legend.text,
                       args.legend = args.legend, ...)
            } else if (typeP == "batch") {
              batchEstPlot(x, ...)
            } else if (typeP == "var") {
              # Check
              if (sum(is.element("ARSyNmodels", names(x))) < 1) {
                stop("Default plot is not possible with the current mbac object. Please select another plot type with typeP argument")
              }
              explained_varPlot(x, ...)
            } else if (typeP == "q2") {
              # Check
              if (sum(is.element("PLSmodels", names(x))) < 1) {
                stop("Default plot is not possible with the current mbac object. Please select another plot type with typeP argument")
              }
              Q2_plot(x, ...)
            } else {
              stop("Error: value of typeP argument not valid.")
            }
}


#' Q2_plot
#'
#' @param mbac Object of class mbac generated by *MultiBaC* or *genModelList*.
#' @param ... Other graphical parameters
#'
#' @export
#'
#' @return Q2 plot of PLS models is displayed.
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' my_final_mbac <- MultiBaC (my_mbac,
#'                            test.comp = NULL, scale = FALSE,
#'                            center = TRUE, crossval = NULL,
#'                            Variability = 0.90,
#'                            Interaction = TRUE ,
#'                            showplot = FALSE,
#'                            showinfo = FALSE)
#'
#' Q2_plot (my_final_mbac)
#'
Q2_plot <- function (mbac, ...){
  modelList <- mbac$PLSmodels
  # Q2 plot -------------------------------------------------------------------
  pallete <- grDevices::colors()[c(11,17,51,56,29,512,97,653,136,24)]

  # Get Q2 values -------------------------------------------------------------
  q2values <- lapply(modelList, function(l){
    lapply(l, function(x){x@modelDF$`Q2(cum)`})
  })
  test.comp <- max(unlist(lapply(q2values, function(l) {
    lapply(l, length)
  })))
  # Make plot
  plot(1:3,1:3, type = "n", pch = 19,
       ylim = c(min(unlist(q2values)),1), xaxt = "n",
       xlim = c(1,test.comp+1), xlab="Number of Components", ylab = "Squared Q value",
       main = "Q2 plot", bty = "L",
       cex.lab = 1.25, cex.axis = 1.25, font.lab = 2, cex.main=1.5)
  names <- c()
  abline(h = 0.7, col = "gray", lty = 5)
  for ( i in seq_along(q2values)) {
    for ( m in seq_along(q2values[[i]])) {
      names <- c(names, paste0(names(q2values)[i], ": ", names(q2values[[i]])[m]))
      lines(1:length(q2values[[i]][[m]]), q2values[[i]][[m]],
            type = "b", pch = 19, col = pallete[i])
      cn <- which(q2values[[i]][[m]] == max(q2values[[i]][[m]]))
      points(cn, q2values[[i]][[m]][cn], pch = 17, cex = 1.8, col = pallete[i])
    }
  }
  axis(1, seq_len(test.comp), c(seq_len(test.comp)), cex.axis = 1.25)

  cood <- findGrid(test.comp, unlist(q2values, recursive = FALSE))
  legend(cood$x, cood$y,
         bty = "o", title = "Batches", bg = rgb(255, 255, 255, alpha = 0.6*255,
                                                maxColorValue = 255),
         box.lwd = 0,
         legend = names, xjust = 0.5, yjust = 0.5,
         col = c(pallete),
         cex = 1.5, lty = c(1,1), pch = c(19,19))
}


#' explained_varPlot
#'
#' @param mbac Object of class mbac generated by *ARSyNbac*, *MultiBaC* or *batchCorrection*.
#' @param ... Other graphical parameters.
#'
#' @return Explained variance plot for ARSyN models is displayed.
#'
#' @export
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' my_final_mbac <- MultiBaC (my_mbac,
#'                            test.comp = NULL, scale = FALSE,
#'                            center = TRUE, crossval = NULL,
#'                            Variability = 0.90,
#'                            Interaction = TRUE ,
#'                            showplot = FALSE,
#'                            showinfo = FALSE)
#'
#' explained_varPlot (my_final_mbac)
#'
explained_varPlot <- function(mbac, ...){
  ascamodels <- mbac$ARSyNmodels

  if (!is.null(ascamodels[[1]]$Model.bab)) {
    ylab_title <- "Explained batch-related variability (%)"
  } else if (!is.null(ascamodels[[1]]$Model.b)) {
    ylab_title <- "Explained batch-related variability (%)"
  } else {
    ylab_title <- "Explained residuals variability (%)"
  }

  # Get Variability -------------------------------------------------------------
  Variability <- lapply(ascamodels, function(l) {
    if (!is.null(l$Model.bab)) {
      l$Model.bab$Variability
    } else if (!is.null(l$Model.b)) {
      l$Model.b$Variability
    } else {
      l$Model.res$beta
    }

  })

  # Get components -------------------------------------------------------------
  components <- lapply(ascamodels, function(l) {
    if (!is.null(l$Model.bab)) {
      dim(l$Model.bab$scores)[2]
    } else if (!is.null(l$Model.b)) {
      dim(l$Model.b$scores)[2]
    } else {
      dim(l$Model.res$scores)[2]
    }

  })

  # Get exp.var ---------------------------------------------------------------
  ascamodels <- lapply(ascamodels, function(l) {
    if (!is.null(l$Model.bab)) {
      lim <- dim(l$Model.bab$var.exp)[1]
      c(0,l$Model.bab$var.exp[1:min(lim,10),2])
    } else if (!is.null(l$Model.b)) {
      lim <- dim(l$Model.b$var.exp)[1]
      c(0,l$Model.b$var.exp[1:min(lim,10),2])
    } else {
      lim <- dim(l$Model.res$var.exp)[1]
      c(0,l$Model.res$var.exp[1:min(lim,10),2])
    }

  })
  test.comp <- max(unlist(lapply(ascamodels, length)))

  # Explained batch-related variability plot ----------------------------------

  plot(c(0,0), type = "n", pch = 19,
       ylim = c(0,100), xaxt = "n",
       xlim = c(0,test.comp), xlab="Number of Components",
       ylab = ylab_title,
       main = "ARSyN n. of components", bty = "L",
       cex.lab = 1.25, cex.axis = 1.25, font.lab = 2, cex.main=1.5)
  pallete <- grDevices::colors()[c(11,17,51,56,29,512,97,653,136,24)]

  for ( i in seq_along(ascamodels)) {
    lines(0:(length(ascamodels[[i]])-1),
          c(ascamodels[[i]]*100),
          type = "b", pch = 19, col = pallete[i])
    points(components[[i]], 100*ascamodels[[i]][(components[[i]]+1)],
           pch = 17,
           cex = 1.8, col = pallete[i])
    abline(h = Variability[[i]]*100, col = "gray", lty = 5)
  }

  axis(1, 0:(length(ascamodels[[1]])-1), 0:(length(ascamodels[[1]])-1), cex.axis = 1.25)
  cood <- findGrid(test.comp, lapply(ascamodels, function(x) x*100))
  legend(cood$x, cood$y,
         bty = "o", title = "Omics", bg = rgb(255, 255, 255, alpha = 0.6*255,
                                             maxColorValue = 255),
         box.lwd = 0,
         legend = c(names(ascamodels)),
         xjust = 0.5, yjust = 1,
         col = c(pallete[1], pallete[2:4]),
         cex = 1.5, lty = rep(1,length(ascamodels)), pch = rep(19,length(ascamodels)))
}

#' inner_relPLot
#'
#' @param mbac Object of class mbac generated by *MultiBaC* or *genModelList*.
#' @param comp2plot Indicates which components are plotted. Default is "c(1,2)", which means that just the inner relation plot of components 1 and 2 are shown. If more components are indicated, the function will return as many plots as needed to show all the components.
#' @param ... Other graphical parameters
#'
#' @return Inner relation plot for PLS models is displayed.
#'
#' @export
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' my_final_mbac <- MultiBaC (my_mbac,
#'                            test.comp = NULL, scale = FALSE,
#'                            center = TRUE, crossval = NULL,
#'                            Variability = 0.90,
#'                            Interaction = TRUE ,
#'                            showplot = FALSE,
#'                            showinfo = FALSE)
#'
#' inner_relPlot (my_final_mbac)
#'
inner_relPlot <- function(mbac, comp2plot = c(1,2), ...) {

  modelList <- mbac$PLSmodels
  for ( b in seq_along(modelList)) {

    for ( m in seq_along(modelList[[b]])) {
      aux <- readline(prompt="Hit <Return> to see next plot: ")
      message(paste0("Inner correlation of scores. Batch: ", names(modelList)[b],
                     "; Model for omic: ", names(modelList[[b]])[m]))

      max.comp <- dim(modelList[[b]][[m]]@scoreMN)[2]
      comp2plot <- comp2plot[which(comp2plot <= max.comp)]
      ncomp <- length(comp2plot)


      # Create layout
      initpar <- par()
      on.exit(par(initpar))
      grid <- floor(ncomp/2) + ncomp%%2
      par(mfrow = c(grid,2), xpd = FALSE)

      for ( n in seq_along(comp2plot)) {
        par(xpd = FALSE)
        plot(modelList[[b]][[m]]@scoreMN[,comp2plot[n]],
             modelList[[b]][[m]]@uMN[,comp2plot[n]], pch = 19,
             col = "dodgerblue3", asp = 1, bty = "l",
             xlab = "X score [t]", ylab = "Y score [u]",
             main = paste0("Component number: ",comp2plot[n]), type = "n")
        abline (0,1, col = "indianred", lwd = 2)
        abline (v = 0, col = "gray", lwd = 1, lty = 5)
        abline (h = 0, col = "gray", lwd = 1, lty = 5)
        points(modelList[[b]][[m]]@scoreMN,
               modelList[[b]][[m]]@uMN, pch = 19,
               col = "dodgerblue3")
        r2 <- stats::cor(modelList[[b]][[m]]@scoreMN[,comp2plot[n]],
                         modelList[[b]][[m]]@uMN[,comp2plot[n]])
        par(xpd = TRUE)
        text(0, max(modelList[[b]][[m]]@uMN[,comp2plot[n]]+0.5), label = paste0("cor: ",round(r2, digits = 3)))
      }
    }
  }
}


#' batchEstPlot
#'
#' This function uses linear models to estimate the batch effect magnitude using the common data across batches. It compares
#' the result with theoretical distribution of diferrent levels of batch magnitude.
#'
#' @param mbac Object of class mbac generated by *createMbac*.
#' @param ... Other graphical parameters.
#'
#' @return Batch estimation plot is displayed.
#'
#' @export
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' batchEstPlot (my_mbac)
#'
batchEstPlot <- function(mbac, ...) {

  ListOfBatches <- mbac$ListOfBatches
  initpar <- par(c("mai", "pin", "xpd"))
  mai <- c(initpar$mai[1:3], initpar$pin[2]-0.15)
  on.exit(par(mai = initpar$mai, xpd = initpar$xpd))
  par(mai = mai)

  # Input Data Structure -------------------------------------------------------
  inputList <- getData(ListOfBatches)
  commonOmic <- mbac$commonOmic

  # Extract common omic --------------------------------------------------------
  Xunfold <- NULL
  for ( lab in seq_along(inputList)) {
    Xunfold <- rbind(Xunfold, t(inputList[[lab]][[commonOmic]]))
  }

  # Create batch factor --------------------------------------------------------
  batch_factor <- c()
  for ( i in seq_along(inputList) ) {
    batch_factor <- c(batch_factor, rep(i, dim(inputList[[i]][[1]])[2]))
  }

  # Calculate Overall Mean -----------------------------------------------------

  offset<-apply(Xunfold,2,mean)
  Xoff<-Xunfold-(cbind(matrix(1,nrow=nrow(Xunfold),ncol=1))%*%rbind(offset))

  # Compute coefficients -------------------------------------------------------

  design.matrix <- stats::model.matrix(~ factor(batch_factor))
  #
  design.matrix[which(design.matrix==0)] <- -1
  #
  beta.hat <- solve(t(design.matrix)%*%design.matrix) %*% t(design.matrix) %*% Xoff



  # Theoretical distributions --------------------------------------------------

  beta.hat_v <- as.vector(t(beta.hat))

  reg_v <- c()
  for ( i in 1:(length(beta.hat_v)/dim(beta.hat)[2])) {
    reg_v <- c(reg_v, rep(i,dim(beta.hat)[2]))
  }

  toplot <- data.frame("Coef" = (reg_v[(dim(beta.hat)[2]+1):length(beta.hat_v)]),
                       "Values" = beta.hat_v[(dim(beta.hat)[2]+1):length(beta.hat_v)])
  for ( i in seq_along(ListOfBatches)[-1]) {
    toplot$Coef[which(toplot$Coef==i)] <- names(ListOfBatches)[i]
  }



  # The label
  label = paste0("* Batch ", names(ListOfBatches)[1], " as\nreference")
  Coef <- toplot$Coef
  Values <- toplot$Values

  # The  plot
  p <- ggplot(data = toplot, mapping = aes(x=Coef, y=Values)) +
    geom_violin() +
    labs(title="Batch Effect Magnitude", x = "Batch", y = "Coefficients per variable", size=2,
         col = "Theoretical Batch effect \nMagnitudes",
         caption = label, font_face = "italic") +
    annotate("text", x = Inf, y = -3, label = label, hjust = -0.2, size = 5) +
    scale_fill_brewer(palette="Blues") +
    #geom_dotplot(binaxis='y', stackdir='center', dotsize=0.5) +
    theme_classic(base_family = "") +
    theme(axis.text=element_text(size=18),
          title=element_text(size=16,face="bold"),
          plot.title = element_text(hjust = 0.5),
          legend.text = element_text(size=20),
          legend.title = element_text(size = 20, face = "plain"),
          legend.spacing = unit(.25,"cm")) +
    geom_hline(aes(yintercept=6, col = "Large"), lty = 5, show.legend = TRUE) +
    geom_hline(aes(yintercept=-6, col = "Large"), lty = 5,show.legend = TRUE) +
    geom_hline(aes(yintercept=4, col = "Medium"), lty = 5,show.legend = TRUE) +
    geom_hline(aes(yintercept=-4, col = "Medium"), lty = 5,show.legend = TRUE) +
    geom_hline(aes(yintercept=2, col = "Small"), lty = 5,show.legend = TRUE) +
    geom_hline(aes(yintercept=-2, col = "Small"), lty = 5,show.legend = TRUE)

  p
}


#' plot_pca
#'
#' @param mbac Object of class mbac generated by *createMbac*, *ARSyNbac*, *MultiBaC*, *genModelList*, or *batchCorrection*.
#' @param col.by.batch Argument for PCA plots. TRUE or FALSE. If TRUE (default) samples are gruped according to the batch factor. If FALSE samples are gruped according to the experimental desing.
#' @param col.per.group Argument for PCA plot. Indicates the color for each group defined in "groups" argument. If NULL (default) the colors are taken from a predefined pallete.
#' @param comp2plot Indicates which components are plotted. Default is "c(1,2)", which means that component 1 is plotted in "x" axis and component 2 in "y" axis. If more components are indicated, the function will return as many plots as needed to show all the components.
#' @param ... Other graphical arguments.
#' @param typeP "pca.cor", "pca.org" or "pca.both". If inputOmics contains original matrices, set "pca.org". However, if inputOmics contains the corrected matrices, set "pca.cor".
#' @param legend.text a vector of text used to construct a legend for the plot.
#' @param args.legend list of additional arguments to pass to legend(); names of the list are used as argument names. Only used if legend.text is supplied.
#'
#' @return A PCA plot is displayed.
#'
#' @export
#'
#' @examples
#' data('multiyeast')
#'
#' my_mbac <- createMbac (inputOmics = list(A.rna, A.gro, B.rna, B.ribo, C.rna, C.par),
#'                        batchFactor = c("A", "A", "B", "B", "C", "C"),
#'                        experimentalDesign = list("A" =  c("Glu+", "Glu+",
#'                        "Glu+", "Glu-", "Glu-", "Glu-"),
#'                        "B" = c("Glu+", "Glu+", "Glu-", "Glu-"),
#'                        "C" = c("Glu+", "Glu+", "Glu-", "Glu-")),
#'                        omicNames = c("RNA", "GRO", "RNA", "RIBO", "RNA", "PAR"))
#'
#' plot_pca(my_mbac, typeP = "pca.org")
#'
#' my_final_mbac <- MultiBaC (my_mbac,
#'                            test.comp = NULL, scale = FALSE,
#'                            center = TRUE, crossval = NULL,
#'                            Variability = 0.90,
#'                            Interaction = TRUE ,
#'                            showplot = FALSE,
#'                            showinfo = FALSE)
#'
#' plot_pca(my_final_mbac, typeP = "pca.cor")
#'
plot_pca <- function(mbac, col.by.batch = TRUE, col.per.group = NULL,
                     comp2plot = c(1,2), typeP = "pca.both", legend.text = NULL,
                     args.legend = NULL, ...) {

  initpar <- par(c("mfrow"))
  on.exit(par(mfrow = initpar))
  ListOfBatches <- mbac$ListOfBatches
  if ( !is.null(mbac$CorrectedData)) {
    CorrectedData <- mbac$CorrectedData
  }

  create_gmatrix <- function(ListOfBatches, col.by.batch = TRUE, comp2plot = c(1,2)) {
    # Check if pca plot is possible ----------------------------------------------
    dims <- unlist(lapply(ListOfBatches, function(l) {
      lapply(l@ExperimentList, function(m) {
        dim(m)[1]
      })
    }))

    groups <- c()
    if ((sum(dims == dims[1])-length(dims))==0) {
      gmatrix <- NULL
      for ( b in seq_along(ListOfBatches)) {
        len_g <- 0
        for ( m in seq_along(ListOfBatches[[b]]@ExperimentList)) {
          gmatrix <- cbind(gmatrix, ListOfBatches[[b]]@ExperimentList[[m]])
          len_g <- len_g + dim(ListOfBatches[[b]]@ExperimentList[[m]])[2]
        }
        if (col.by.batch) {
          groups <- c(groups, rep(names(ListOfBatches)[b], len_g))
        } else {
          groups <- c(groups, rep(data.frame(ListOfBatches[[b]]@colData$tfactor),
                                  length(seq_along(ListOfBatches[[b]]@ExperimentList))))
        }

      }
      groups <- unlist(groups)
      gmatrix <- t(gmatrix)
      max.comp <- min(dim(gmatrix))-1
      comp2plot <- comp2plot[which(comp2plot <= max.comp)]

      pc <- PCA.GENES(gmatrix, ncomp = max(comp2plot))
    } else {
      # get commonOmic
      t1 <- table(unlist(lapply(ListOfBatches, function(l) {
        names(l@ExperimentList)
      })))
      commonOmic <- names(t1[which(t1==max(t1))])
      #
      gmatrix <- NULL
      for ( b in seq_along(ListOfBatches)) {
        len_g <- 0
        gmatrix <- cbind(gmatrix, ListOfBatches[[b]]@ExperimentList[[commonOmic]])
        len_g <- len_g + dim(ListOfBatches[[b]]@ExperimentList[[commonOmic]])[2]
        if (col.by.batch) {
          groups <- c(groups, rep(names(ListOfBatches)[b], len_g))
        } else {
          groups <- c(groups, rep(data.frame(ListOfBatches[[b]]@colData)$tfactor,
                                  length(seq_along(ListOfBatches[[b]]@ExperimentList))))
        }

      }
      gmatrix <- t(gmatrix)
      max.comp <- min(dim(gmatrix))-1
      comp2plot <- comp2plot[which(comp2plot <= max.comp)]

      pc <- PCA.GENES(gmatrix, ncomp = max(comp2plot))
    }

    return(list("pc" = pc, "groups" = groups,
                "comp2plot" = comp2plot))
  }

  in_plot <- function(pc1, pc2 = NULL,
                      groups, col.per.group = NULL,
                      comp2plot, title = NULL, legend.text = NULL,
                      args.legend = NULL, ...) {

    # Set color, pch and title arguments ----------------------------------------------
    pallete <- grDevices::colors()[c(11,17,51,56,29,512,97,653,136,24)]
    if ( is.null(col.per.group)) {
      col.per.group <- pallete[1:length(unique(groups))]
      names(col.per.group) <- names(table(groups))
    } else {
      if (length(table(groups)) > length(table(col.per.group))) {
        col.per.group <- rep(col.per.group, ceiling(length(table(groups))/length(table(col.per.group))))
        names(col.per.group)[1:length(table(groups))] <- names(table(groups))
      } else {
        names(col.per.group)[1:length(table(groups))] <- names(table(groups))
      }
    }
    color <- unlist(sapply (groups, function (g) {
      col.per.group[g]
    }))
    #
    plot(pc1$scores[,comp2plot[1]], pc1$scores[,comp2plot[2]],
         xlab = paste0("PC ", comp2plot[1], ": ",
                       round(pc1$var.exp[comp2plot[1],1], digits = 4)*100, " %"),
         ylab = paste0("PC ", comp2plot[2], ": ",
                       round(pc1$var.exp[comp2plot[2],1], digits = 4)*100, " %"),
         # pch = omic; fill = condition
         col = color,
         asp = 1,
         # other arguments
         main = title, xlim = c(min(pc1$scores[,comp2plot[1]]), (max(pc1$scores[,comp2plot[1]]+5)+
                                                                   max(pc1$scores[,comp2plot[1]]+5)*0.3)),
         ...)
    abline(v = 0, lty = 5, col = "gray", xpd = FALSE)
    abline(h = 0, lty = 5, col = "gray", xpd = FALSE)
    if (!is.null(legend.text)) {
      if (is.null(args.legend)) {
        args.legend <- list(x = "topright", fill = col.per.group,
                            legend = legend.text)
      } else {
        args.legend[["legend"]] <- legend.text
      }
      do.call(legend, args.legend)
    }  else {
      legend("topright", legend = names(col.per.group), fill = col.per.group)
    }

    if (!is.null(pc2)) {

      # Set color, pch and title arguments ----------------------------------------------
      pallete <- grDevices::colors()[c(11,17,51,56,29,512,97,653,136,24)]
      if ( is.null(col.per.group)) {
        col.per.group <- pallete[1:length(unique(groups))]
        names(col.per.group) <- names(table(groups))
      } else {
        if (length(table(groups)) > length(table(col.per.group))) {
          col.per.group <- rep(col.per.group, ceiling(length(table(groups))/length(table(col.per.group))))
          names(col.per.group)[1:length(table(groups))] <- names(table(groups))
        } else {
          names(col.per.group)[1:length(table(groups))] <- names(table(groups))
        }
      }
      color <- unlist(sapply (groups, function (g) {
        col.per.group[g]
      }))
      plot(pc2$scores[,comp2plot[1]], pc2$scores[,comp2plot[2]],
           xlab = paste0("PC ", comp2plot[1], ": ",
                         round(pc2$var.exp[comp2plot[1],1], digits = 4)*100, " %"),
           ylab = paste0("PC ", comp2plot[2], ": ",
                         round(pc2$var.exp[comp2plot[2],1], digits = 4)*100, " %"),
           # pch = omic; fill = condition
           col = color,
           asp = 1,
           # other arguments
           main = "PCA of corrected data", xlim = c(min(pc2$scores[,comp2plot[1]]), (max(pc1$scores[,comp2plot[1]]+5)+
                                                                                       max(pc1$scores[,comp2plot[1]]+5)*0.3)),
           ...)
      abline(v = 0, lty = 5, col = "gray", xpd = FALSE)
      abline(h = 0, lty = 5, col = "gray", xpd = FALSE)
      if (!is.null(legend.text)) {
        if (is.null(args.legend)) {
          args.legend <- list(x = "topright", fill = col.per.group,
                              legend = legend.text)
        } else {
          args.legend[["legend"]] <- legend.text
        }
        do.call(legend, args.legend)
      } else {
        legend("topright", legend = names(col.per.group), fill = col.per.group)
      }
    }
  }

  if ( typeP == "pca.org") {

    aux <- create_gmatrix (ListOfBatches = ListOfBatches,
                           col.by.batch = col.by.batch,
                           comp2plot = comp2plot)
    pc <- aux$pc
    groups <- aux$groups
    comp2plot <- comp2plot
    ncomp <- length(comp2plot) - 1
    if (ncomp > 1) {
      grid <- floor(ncomp/2) + ncomp%%2
      par(mfrow = c(grid,2), xpd = FALSE)
    }


    for ( nc in comp2plot[-1]) {
      in_plot (pc1 = pc, pc2 = NULL,
               groups = groups, col.per.group = col.per.group,
               comp2plot = c(comp2plot[1], comp2plot[nc]), title =
                 "PCA of original data", legend.text = legend.text,
               args.legend = args.legend, ...)
    }
  } else if (typeP == "pca.cor") {
    aux <- create_gmatrix (ListOfBatches = CorrectedData,
                           col.by.batch = col.by.batch,
                           comp2plot = comp2plot)
    pc <- aux$pc
    groups <- aux$groups
    comp2plot <- comp2plot
    ncomp <- length(comp2plot) - 1
    if (ncomp > 1) {
      grid <- floor(ncomp/2) + ncomp%%2
      par(mfrow = c(grid,2), xpd = FALSE)
    }

    for ( nc in comp2plot[-1]) {
      in_plot (pc1 = pc, pc2 = NULL,
               groups = groups, col.per.group = col.per.group,
               comp2plot = c(comp2plot[1], comp2plot[nc]), title =
                 "PCA of corrected data", legend.text = legend.text,
               args.legend = args.legend, ...)
    }
  } else if (typeP == "pca.both") {

    auxO <- create_gmatrix (ListOfBatches = ListOfBatches,
                            col.by.batch = col.by.batch,
                            comp2plot = comp2plot)
    pc <- auxO$pc
    groups <- auxO$groups
    comp2plot <- comp2plot

    auxC <- create_gmatrix (ListOfBatches = CorrectedData,
                            col.by.batch = col.by.batch,
                            comp2plot = comp2plot)
    pc2 <- auxC$pc

    ncomp <- 2 * (length(comp2plot) - 1)
    if (ncomp > 1) {
      grid <- floor(ncomp/2) + ncomp%%2
      par(mfrow = c(grid,2), xpd = FALSE)
    }
    for ( nc in comp2plot[-1]) {
      in_plot (pc1 = pc, pc2 = pc2,
               groups = groups, col.per.group = col.per.group,
               comp2plot = c(comp2plot[1], comp2plot[nc]), title =
                 "PCA of original data", legend.text = legend.text,
               args.legend = args.legend, ...)
    }
  }
}