R/lipidome_comparison_pca.R

Defines functions plot_loadings plot_contrib_to_pc biplot_ggplot2 scree_base biplot_factoextra plot_pc_variance scree_factoextra

Documented in biplot_factoextra biplot_ggplot2 plot_contrib_to_pc plot_loadings plot_pc_variance scree_base scree_factoextra

### Principal component analysis

#' Scree plot with factoextra
#'
#' @description `scree_factoextra` takes a prcomp element and diplays the percentage of variance explained by the principal components in a barplot.
#' @details This function takes a prcom element (i.e. an element generated by the base::prcomp function), and displays the percentage of variance explained by each principal component in a barplot. Additionally there is also a dot- and line graph.
#' @param prcomp_element object of class prcomp or pca. Produced by base::prcomp or FactoMineR::PCA.
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @param title string. Main title of plot. Default: "Scree plot"
#' @export
#' @examples
#' {pca_iris <- prcomp(dplyr::select_if(iris, is.numeric))
#' scree_factoextra(pca_iris, title = "Scree plot iris")}
scree_factoextra <- function(prcomp_element,
                             title = "Scree plot",
                             out_path = "none"){

  scree <- factoextra::fviz_eig(prcomp_element,
                    barfill = viridis::viridis(n = 1, begin = 0.3),
                    barcolor = viridis::viridis(n = 1, begin = 0.3),
                    # linecolor = "grey40",
                    addlabels = TRUE) +
    ggplot2::labs(title = title) +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
          axis.text.x = ggplot2::element_text(size = 8),
          axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
          axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
          legend.text = ggplot2::element_text(size = 8),
          legend.title = ggplot2::element_text(size = 10))

  if(out_path != "none"){
    print(paste("Saving plot to ", out_path, "_screePlot.png", sep = ""))
    ggplot2::ggsave(paste(out_path, "_screePlot.png", sep = ""),
           plot = scree)
  }
  else{
    scree
  }
}


#' Plot contribution of principal component to variance
#'
#' @description `plot_pc_variance` prints a barplot with a line of the contribution of principal components to the data variance
#' @param data_frame data frame. Contains eigenvalues, contribution to data variance and cummulative contribution to data variance for a PCA model.
#' @param x column of data_frame. Column with principal component names.
#' @param y column of data_frame. Column with eigenvalues/contribution to variance/ cummulative contribution to variance.
#' @param xlab string. X-axis label. Default: "dimensions"
#' @param ylab string. X-axis label. Default: "variance [\%]"
#' @param title string. Main title. Default: "Scree plot"
#' @param fill string. Color of the barplot. Default: "#35608DFF"
#' @param hjust numeric. Position of labels relative to the points in the graph. Default: -0.2
#' @export
#' @examples
#' {iris_pca <- FactoMineR::PCA(dplyr::select_if(iris, is.numeric))
#' iris_eigen <- as.data.frame(factoextra::get_eigenvalue(iris_pca))
#' plot_pc_variance(iris_eigen, x = rownames(iris_eigen), y = iris_eigen$cumulative.variance.percent)}
plot_pc_variance <- function(data_frame,
                             x,
                             y,
                             xlab = "dimensions",
                             ylab = "variance [%]",
                             title = "Scree plot",
                             fill = "#35608DFF",
                             hjust = -0.2){

  variance_barchart <- ggplot2::ggplot(data=data_frame, ggplot2::aes(x=x, y=y, group = 1)) +
    ggplot2::geom_bar(stat = "identity", fill = fill) +
    ggplot2::geom_line() +
    ggplot2::geom_point() +
    ggplot2::labs(title = title, y = ylab, x = xlab) +
    ggplot2::scale_x_discrete(limits = x) +
    ggplot2::geom_text(ggplot2::aes(label = round(y, 1)), vjust = -0.2, hjust = hjust, color = "grey40")

  variance_barchart
}





#' Biplot with factorextra
#'
#' @description `biplot_factoextra` prints a biplot from a prcomp element.
#' @details This function prints a biplot from a prcomp element.
#' The points can be colored by group. Loadings are optionally displayed.
#' @param prcomp_element object of class prcomp. The base::prcomp function performs a principal component analysis.
#' This is the object it returns.
#' @param groups vector. Groups to color by. Default = "none"
#' @param ellipse bool. Draw confidence ellipse arount the clusters of groups. Default = FALSE.
#' @param loadings string / vector of strings. Draw loadings arrows and labels. c("arrow", "text"), "text", "none" (default).
#' @param out_path string. Path to save plot to png.
#' If out_path is empty, the plot is printed to the device.
#' @export
#' @examples
#' {pca_iris <- prcomp(dplyr::select_if(iris, is.numeric))
#' iris_groups <- iris$Species
#' biplot_factoextra(pca_iris)
#' biplot_factoextra(pca_iris,
#'                   groups = iris_groups,
#'                   ellipse = TRUE,
#'                   loadings = c("arrow", "text"))}
biplot_factoextra <- function(prcomp_element,
                              groups = "none",
                              ellipse = FALSE,
                              loadings = "none",
                              out_path = "none"){

  biplot <- factoextra::fviz_pca_biplot(prcomp_element,
                            habillage = groups, # a vector of groups by whicht to color
                            label = "var",
                            labelsize = 2,
                            repel = TRUE, # labels do not overlap
                            col.var = "grey40",
                            ## ellipses
                            addEllipses = ellipse,
                            ellipse.type = "norm",
                            ## legend
                            legend.title = "Groups",
                            geom.var = loadings)   +
    viridis::scale_color_viridis(discrete = TRUE) +
    viridis::scale_fill_viridis(discrete = TRUE) +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
              axis.text.x = ggplot2::element_text(size = 8),
              axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
              axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
              legend.text = ggplot2::element_text(size = 8),
              legend.title = ggplot2::element_text(size = 10))

  if(out_path != "none"){
    print(paste("Saving to ", out_path, "_biplot.png", sep = ""))
    ggplot2::ggsave(paste(out_path, "_biplot.png", sep = ""),
           plot = biplot)
  }
  else{
    biplot
  }
}


#' Scree plot with Rbase
#'
#' @description `scree_base` takes a prcomp element and diplays the percentage and the cummulative percentage of variance of variance explained by the principal components in a barplot.
#' @details This function takes a prcomp element (i.e. an element generated by the base::prcomp function), and displays the percentage of variance and the cummulative percentage of variance explained by each principal component in a dot- and line graph.
#' @param input_df data frame or matrix.
#' @param scale bool. Apply scaling when erforming PCA. Default = FALSE.
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @param title vector of strings. Main titles for both plots. Default: c("Scree plot", "Cummulative scree plot")
#' @export
#' @examples
#' scree_base(mtcars)
scree_base <- function(input_df,
                       title = c("Scree plot", "Cummulative scree plot"),
                       scale = FALSE,
                       out_path = "none"){

  out_name <- paste(out_path, "_screePlot", ".png", sep = "")

  prcomp_element <- stats::prcomp(input_df)

  # variance explained by each pc
  var_pca <- prcomp_element$sdev ^ 2
  prop_of_variance <- round(var_pca / sum(var_pca), 5) * 100
  cummulative_prop_of_variance <- cumsum(prop_of_variance)
  proportion_of_variance_table <- data.frame(Proportion_of_variance = prop_of_variance,
                                             Cummulative_proportion_of_variance = cummulative_prop_of_variance)

  func <- function(){
    graphics::par(mfrow=c(1,2), mar = c(2, 2, 2, 2))
    graphics::plot(prop_of_variance, # scree plot to decide which PCs are used
         main = "", xlab ="", ylab ="",
         ylim=c(0 ,100),
         type = "b")
    graphics::title( main = title[1], cex.main = 0.9, font.main = 1,
           cex.lab = 1,
           xlab =" Principal Component ",
           ylab =" Proportion of Variance Explained [%]")

    graphics::plot(cummulative_prop_of_variance, # scree plot to decide which PCs are used
         main = "", xlab ="", ylab ="",
         ylim=c(0 ,100),
         type="b")
    graphics::title( main = title[2], cex.main = 0.9, font.main = 1,
           cex.lab = 0.8,
           xlab ="Principal Component",
           ylab ="Cumulative Proportion of Variance Explained [%]")
  }

  if(out_path != "none"){
    print(paste("Saving to ", out_name))
    grDevices::png(filename = out_name)
    func()
    grDevices::dev.off()
  }
  else{
    func()
  }
}


#' Biplot with ggplot2
#'
#' @description `biplot_ggplot2` prints a biplot from a prcomp element.
#' @details This function prints a biplot from a prcomp element. The points can be colored by group. Loadings are optionally displayed.
#' @param input_df a data frame with at least one factor column.
#' @param xdim integer. Default: 1
#' @param ydim integer. Default: 2
#' @param scale logical. Should data be scaled to unit variance for PCA? Default = FALSE
#' @param title string. Main title of the plot. Default: "PCA - Biplot"
#' @param groups vector. Groups to color by. Default = "none"
#' @param ellipse logical. Draw confidence ellipse arount the clusters of groups. Default = FALSE.
#' @param loadings logical. Draw loadings arrows and labels. Default = FALSE
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @export
#' @examples
#' \dontrun{
#' biplot_ggplot2(iris, groups = "Species")
#' biplot_ggplot2(iris, groups = "Species", xdim = 1, ydim = 3  )
#' biplot_ggplot2(iris, "Species", ellipse = TRUE, loadings = TRUE)}
biplot_ggplot2 <- function(input_df,
                           xdim = 1,
                           ydim = 2,
                           groups = "none",
                           scale = FALSE,
                           loadings = FALSE,
                           ellipse = FALSE,
                           title = "PCA - Biplot",
                           out_path = "none"){

  if(groups == "none"){
    group_by <- NULL
  }
  else {
    group_by <- groups
  }

  prcomp_element <- stats::prcomp(dplyr::select_if(input_df, is.numeric), scale. = scale)
  proportion <- summary(prcomp_element)$importance
  x_prop <- round(proportion[2, xdim] * 100, 1)
  y_prop <- round(proportion[2, ydim] * 100, 1)

  biplot <- ggplot2::autoplot(prcomp_element, data = input_df,
                     x = xdim,
                     y = ydim,
                     colour = group_by,
                     shape = group_by,
                     loadings = loadings,
                     loadings.colour =  "black",
                     loadings.label = loadings,
                     loadings.label.size = 2,
                     loadings.label.colour = "black",
                     frame = ellipse,
                     frame.type = "norm") +
    ggplot2::labs(title = title,
         x = paste("PC", xdim, " (", x_prop, "%)", sep = ""),
         y = paste("PC", ydim, " (", y_prop, "%)", sep = "")) +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
            axis.text.x = ggplot2::element_text(size = 8),
            axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
            axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
            legend.text = ggplot2::element_text(size = 8),
            legend.title = ggplot2::element_text(size = 10))

  if(groups != "none"){
    group_color <- as.vector(viridis::viridis(n = length(levels(input_df[[group_by]])), alpha = 1))
    biplot <- biplot +
      ggplot2::scale_fill_manual(values = group_color) +
      ggplot2::scale_color_manual(values = group_color)
  }

  if(out_path != "none"){
    print(paste("Saving plot to ", out_path, "_biplot.png", sep = ""))
    ggplot2::ggsave(paste(out_path, "_biplot.png", sep = ""),
           plot = biplot)
  }  else{
    biplot
  }
}

#' Plot contribution to pca
#'
#' `plot_contrib_to_pc` plots the contributions of each variable to each principal component in a grid.
#' @param pca_element Object produced by FactoMineR::PCA.
#' @param title string. Main title of the plot. Default = "Contribution of variables to the principal components"
#' @param out_path string. Path to save plot as png. If "none" (default), plot is printed to device.
#' @export
#' @example
#' cars_pca <- factoMineR::PCAdplyr::select_if(mtcars, is.numeric), graph = FALSE)
#' plot_contrib_to_pc(cars_pca)
plot_contrib_to_pc <- function(pca_element,
                               title = "Contribution of variables to the principal components",
                               out_path = "none"){

  out_name = paste(out_path, "pca_contribution")

  pca_variables <- factoextra::get_pca_var(pca_element)
  pca_contrib <- as.data.frame(pca_variables$contrib)
  pca_contrib_ordered <- pca_contrib[order(pca_contrib$Dim.1, decreasing = TRUE),]

  func <- function(){

    graphics::par(mar = c(1, 1, 10, 1), mfrow = c(1, 1), family = "AvantGarde", col.main = "grey40", cex.main = 0.8, col.axis = "grey40")
    mycolors <- as.vector(viridis::viridis(n = 10, direction = -1))
    corrplot::corrplot(t(pca_contrib_ordered),
             method = "circle",
             is.corr = FALSE,
             pch.col = "green",
             tl.srt = 45,
             tl.cex = 0.5,
             tl.col = "grey40",
             col= grDevices::colorRampPalette(mycolors)(10),
             cl.cex = 0.5,
             cl.align.text = "l"
    )
    graphics::title(main = title)
  }

  if(out_path != "none"){
    print(paste("Saving to ", out_name))
    grDevices::png(filename = out_name, width = 15*96, height = 5*96, units = "px")
    func()
    grDevices::dev.off()
  }
  else{
    func()
  }
  graphics::par(mar = c(1, 1, 1, 1))
}

#' Loadings plot
#'
#' `plot_loadings` plots the loadings between the first two principal components.
#' @param pca_object Object produced by FactoMineR::PCA.
#' @param title string. Main title of the plot. Default = "Loadings plot"
#' @param colour logical. TRUE ... color scale by PC1. FALSE (default) ... all black.
#' @param top_loadings integer. Number of loadings to label (the top n loadings of both PCs are labelled).
#' @param xlab string. Title of x-axis. Default: "Dim.1"
#' @param ylab string. Title of y-axis. Default: "Dim.2"
#' @export
#' @example
#' cars_pca <- FactoMineR::PCA(dplyr::select_if(mtcars, is.numeric), graph = FALSE)
#' plot_loadings(cars_pca)
plot_loadings <- function(pca_object,
                          colour = FALSE,
                          top_loadings = 10,
                          title = "Loadings plot",
                          xlab = "Dim.1",
                          ylab = "Dim.2"){
  x <- pca_object$var
  loadings <- as.data.frame(x$coord)
  loadings <- data.frame(loadings, x$contrib)

  loadings$mylabel <- rep("", nrow(loadings))
  loadings <- loadings[order(loadings$Dim.1.1, decreasing = TRUE),]
  loadings$mylabel[1:10] <- rownames(loadings)[1:10]
  loadings <- loadings[order(loadings$Dim.2.1, decreasing = TRUE),]
  loadings$mylabel[1:top_loadings] <- rownames(loadings)[1:top_loadings]

  if(top_loadings == 0){
    loadings$mylabel <- rep("", nrow(loadings))
  }

  pc1_max <- max(loadings$Dim.1)
  pc2_max <- max(loadings$Dim.2)

  if(colour == TRUE){
  loadings_plot <- ggplot2::ggplot(loadings, ggplot2::aes(x=Dim.1,
                                        y=Dim.2,
                                        colour = Dim.1.1))  }
  else{ loadings_plot <- ggplot2::ggplot(loadings, ggplot2::aes(x=Dim.1,
                                              y=Dim.2))  }

  loadings_plot <- loadings_plot +
    ggplot2::geom_point(size=1) +
    ggplot2::geom_hline(yintercept = 0,
               linetype = "dashed",
               colour = "grey60") +
    ggplot2::geom_vline(xintercept = 0,
               linetype = "dashed",
               colour = "grey60") +
    ggrepel::geom_text_repel(ggplot2::aes(x = Dim.1,
                        y = Dim.2,
                        label = mylabel),
                    colour = "black",
                    size = 3) +
    ggplot2::labs(title = title, x = xlab, y = ylab) +
    ggplot2::scale_color_viridis_c(direction = -1, end = 0.9) +
    ggplot2::xlim(-pc1_max, pc1_max) +
    ggplot2::ylim(-pc2_max, pc2_max) +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5, family="AvantGarde"),
          plot.subtitle = ggplot2::element_text(size = 8, hjust = 0.5, family = "AvantGarde", colour = "grey40"),
          axis.text.x = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
          axis.text.y = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
          axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
          axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
          legend.text = ggplot2::element_blank(),
          legend.title = ggplot2::element_blank(),
          legend.position = "none")

  loadings_plot
}
lisaschneider0509/lipidomeComparisonR documentation built on Aug. 12, 2020, 12:52 a.m.