R/lipidome_comparison_visualization.R

Defines functions plot_ratio_barcharts simple_barchart spider_chart parallel_plot calculate_ratio_matrix matrix_heatmap correlation_plot shapiro_by_factor boxplot_by_factor histogram_by_factor qqplot_by_factor

Documented in boxplot_by_factor calculate_ratio_matrix correlation_plot histogram_by_factor matrix_heatmap parallel_plot plot_ratio_barcharts qqplot_by_factor shapiro_by_factor simple_barchart spider_chart

#' Print one qqplot for each variable and group
#'
#' @description `qqplot_by_factor` takes a data frame and prints a qq-plot for each group and varible
#' @details Take a data frame and prints a qq-plot for each group and varible.
#' @param input_df a data frame with at least one factor column.
#' @param factor a string with the column name to group by
#' @param out_path optional string. If out path is given, the generated plots are saved to a pdf document with the given name.
#' @export
#' @examples
#' qqplot_by_factor(iris, "Species")
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' qqplot_by_factor(iris, "Species", dir)
#' }
qqplot_by_factor <- function(input_df, factor, out_path = "none"){
  levels <- levels(input_df[[factor]])
  numeric_df <- dplyr::select_if(input_df, is.numeric)

  func <- function(){
    graphics::par(mfrow=c(3,3))
    for (j in 1:ncol(numeric_df)){
      col_name <- colnames(numeric_df)[j]
      new_df <- stats::na.omit(numeric_df)[,j]
      stats::qqnorm(numeric_df[,j][input_df[[factor]] == levels[i]],
             main = paste(col_name, levels[i], sep = " "),
             cex.main = 0.8)
      stats::qqline(numeric_df[,j][input_df[[factor]] == levels[i]])
    }
  }

  if(out_path == "none"){
    for (i in 1:length(levels)){
      func()
    }
  }
  else{

    for (i in 1:length(levels)){
      print(paste("Saving to ", out_path, "_qqplot_", levels[i], ".pdf", sep = ""))
      grDevices::pdf(paste(out_path, "_qqplot_", levels[i], ".pdf", sep = ""))
      func()
      grDevices::dev.off()
    }
  }
}

#' Print one histogram for each variable group.
#'
#' @description `histogram_by_factor` prints a histogram with density line for each group and variable of a data frame
#' @details Generate a new grid/pdf dochument for each group of a data frame. Generate one plot for each variable of this subset data frame.
#' @param input_df a data frame with at least one factor column
#' @param factor a string with the column name to group by
#' @param out_path optional string.
#' @export
#' @examples
#' histogram_by_factor(iris, "Species")
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' histogram_by_factor(iris, "Species", dir)
#' }
histogram_by_factor <- function(input_df, factor, out_path = "none"){
  levels <- levels(input_df[[factor]])
  numeric_df <- dplyr::select_if(input_df, is.numeric)

  func <- function(){
    graphics::par(mfrow=c(3,3))
    for (j in 1:ncol(numeric_df)){
      col_name <- colnames(numeric_df)[j]
      graphics::hist(numeric_df[,j][input_df[[factor]] == levels[i]],
           main = paste(col_name, levels[i], sep = " "),
           cex.main = 0.8,
           xlab = NULL)
      graphics::lines(stats::density(numeric_df[,j][input_df[[factor]] == levels[i]]))
      graphics::lines(stats::density(numeric_df[,j][input_df[[factor]] == levels[i]],
                    adjust = 1.5), lty = 2)
    }
  }

  if(out_path == "none"){
    for (i in 1:length(levels)){
      func()
    }
  }
  else{
    for (i in 1:length(levels)){
      print(paste("Saving to ", out_path, "_hist_", levels[i], ".pdf", sep = ""))
      grDevices::pdf(paste(out_path, "_hist_", levels[i], ".pdf", sep = ""))
      func()
      grDevices::dev.off()
    }

  }
}

#' Print boxplot by factor
#'
#' @description `boxplot_by_factor` generates plot with one boxplot per group for each variable
#' @details A plot with one boxplot per group is generated for all variables of a data frame.
#' @param input_df a data frame with at least one factor column
#' @param factor a string with the column name to group by
#' @param out_path optional string.
#' @export
#' @examples
#' boxplot_by_factor(iris, "Species")
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' boxplot_by_factor(iris, "Species", dir)
#' }
#'
boxplot_by_factor <- function(input_df, factor, out_path = "none"){
  numeric_df <- dplyr::select_if(input_df, is.numeric)

  func <- function(){
    for (i in 1:ncol(numeric_df)){
      col_name <- colnames(numeric_df)[i]
      graphics::boxplot(numeric_df[,i] ~ input_df[[factor]],
              main = col_name,
              xlab = NULL,
              ylab = NULL)
    }
  }

  if(out_path == "none"){
    graphics::par(mfrow=c(3,3),
        cex.main = 1,
        cex.axis = 0.7)
    func()
  }
  else{
    print(paste("Saving to ", out_path, "_boxplot.pdf", sep = ""))
    grDevices::pdf(paste(out_path, "_boxplot.pdf", sep = ""))
    graphics::par(mfrow=c(3,3),
                  cex.main = 1,
                  cex.axis = 0.7)
    func()
    grDevices::dev.off()
  }
}


#' Assess normality for each group
#'
#' @description `shapiro_by_factor` takes a data frame, applies the shapiro-wilk test to every group and variable and returns a table of p-values.
#' @param input_df a data frame with at least one factor column
#' @param factor a string with the column name to group by
#' @export
#' @example
#' shapiro_by_factor(iris, iris$Species)
shapiro_by_factor <- function(input_df, factor){

  shapiro_pvalue <- stats::aggregate(dplyr::select_if(input_df, is.numeric),
                              by = list(factor),
                              FUN = function(x) {y <- stats::shapiro.test(x); c(y$p.value)})


  print("p < 0.05 ... no normal distribution; p > 0.05 ... normal distribution")
  shapiro_pvalue
}


#' Correlation plots
#'
#' @description `correlation_plot` calculates correlations of variables and displays them in a dotplot
#' @details A grid of dotplots displaying the correlations between the variables of a data frame is generated. Use with less than 10 variables ideally.
#' @param input_df data frame.
#' @param method string. Method for calculating the correlation. Options: "pearson", "kendall", "spearman" (default).
#' @param out_path optional string. Path to save correlation plot to png. If out_path is empty the correlation plot is printed to the device.
#' @export
#' @examples
#' correlation_plot(iris)
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' correlation_plot(iris, out_path = dir)
#' }
correlation_plot <- function(input_df, method = "spearman", out_path = "none"){
  out_name <- paste(out_path, "_correlations", ".png", sep = "")

  func <- function(){
    graphics::par(cex.main = 1, cex.axis = 0.8, family = "AvantGarde", font = 1)
    colour = viridis::viridis(n = 2)
    psych::pairs.panels(dplyr::select_if(input_df, is.numeric),
                        method = method,
                        hist.col = colour[2], rug = FALSE,
                        density = TRUE,  lm = TRUE, ci = FALSE, col = colour[1],
                        ellipses = FALSE,
                        pch = 20, cex = 0.7,
                        cex.cor = 0.5, main = "Correlation plot")
  }

  if(out_path != "none"){
    print(paste("Saving to ", out_name, sep = ""))
    grDevices::png(out_name)
    func()
    grDevices::dev.off()
    func()
  }
  else{
    func()
  }
}

#' Matrix heatmap
#'
#' @description `matrix_heatmap` displays values of a matrix in a heatmap
#' @param input_df data frame or matrix. For exaple correlation or matrix of ratios or differences.
#' @param interactive logical. Print heatmap to device (FALSE, default) or open interactive heatmap in browser (TRUE).
#' @param title string. Main title of the plot. Default: no title
#' @param out_path string. Path to save heatmap to png. If out_path is empty the heatmap is printed to the device.
#' @export
#' @examples
#' iris_cor <- cor(iris[,-5], method = "spearman")
#' matrix_heatmap(iris_cor)
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' matrix_heatmap(iris_cor, interactive = TRUE)
#' matrix_heatmap(iris_cor, out_path = dir)
#' }
matrix_heatmap <- function(input_df,
                           title = "",
                           interactive = FALSE,
                           out_path = "none"){
  melted_matrix <- reshape::melt(input_df)
  names(melted_matrix) <- c("x", "y", "value")
  utils::head(melted_matrix)

  matrix_heatmap <- ggplot2::ggplot(data = melted_matrix,
                                    ggplot2::aes(x=melted_matrix$x,
                                                 y=melted_matrix$y,
                                                 fill=melted_matrix$value)) +
    ggplot2::geom_tile() +
    ggplot2::ggtitle(title) +
    ggplot2::scale_fill_viridis_c(direction = -1) +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5, family="AvantGarde"),
                   axis.text.x = ggplot2::element_text(angle = 90, size = 6, hjust = 1, colour = "grey40", family="AvantGarde"),
                   axis.text.y = ggplot2::element_text(size = 6, colour = "grey40", family="AvantGarde"),
                   axis.title.x = ggplot2::element_blank(),
                   axis.title.y = ggplot2::element_blank()
    )

  if(out_path != "none"){
    print(paste("Saving plot to ", out_path, "_matrix_heatmap.png", sep = ""))
    ggplot2::ggsave(paste(out_path, "_matrix_heatmap.png", sep = ""),
           plot = matrix_heatmap)
  }

  if (interactive == TRUE) {
    interactive_heatmap <- plotly::ggplotly(matrix_heatmap) # interactive heatmap
    interactive_heatmap
  } else {
    matrix_heatmap # static heatmap
  }
}

#' Calculate ratio matrix
#'
#' @description `calculate_ratio_matrix` takes a vector and calculates the ratio between all elements of a vector.
#' @param input_vector numeric vector.
#' @param names_vector vector. Length = length(input_vector). Row and column names for the returnded matrix. Default are numbers 1:length(input_vector).
#' @export
#' @examples
#' myvector <- c(1712.9583, 1446.4583, 1968.4167, 2124.1250, 2315.7083, 1135.1667, 2227.5000)
#' mynames <- letters[1:length(myvector)]
#' calculate_ratio_matrix(myvector, mynames)
calculate_ratio_matrix <- function(input_vector,
                                   names_vector = 1:length(input_vector)){

  ratio_matrix <- matrix(nrow = length(input_vector),
                         ncol = length(input_vector))

  for(i in 1:length(input_vector)){
    for(j in 1:length(input_vector)){
      buffer <- input_vector[i] / input_vector[j]
      ratio_matrix[i, j] <- buffer
    }
  }

  rownames(ratio_matrix) <- names_vector
  colnames(ratio_matrix) <- names_vector

  ratio_matrix
}


#' Parallel coordinates plot
#'
#' @description `parallel_plot` prints a paralell coordinates plot using a data frame
#' @details This function takes a data frame with at least one factor variable and displays it in a pralell coordinates plot, where the different groups are color coded. This plot works best for <= 10 parameters.
#' @param input_df data frame.
#' @param group_vector vector. Vector (part of the data frame), to sort by.
#' @param out_path string. Path to save parallel plot to png. If out_path is empty the parallel plot is printed to the device.
#' @param title string. Main Title. Default = "Parallel plot"
#' @param x_title string. Title of x-axis. Default: none
#' @param y_title string. Title of y-axis. Default: none
#' @param legend_title string. Title of legend. Default: none
#' @param col_names vector. Column names of the data frame and lables on the x-axis. Default: colnames(input_df)
#' @param scale string. Method used to scale the variable. Default = "globalminmax". Options: "std", "robust", "uniminmax", "globalminmax", "center", "centerObs". For more information on the options see help(ggparcoord).
#' @export
#' @examples
#' parallel_plot(iris, iris$Species)
#' \dontrun{
#' dir.create(paste(getwd(), "/examples", sep = ""), showWarnings = FALSE)
#' dir <- paste(getwd(), "/examples/iris", sep = "")
#' parallel_plot(iris, 5, out_path = dir)}
parallel_plot <- function(input_df,
                          group_vector,
                          out_path = "none",
                          title = "Parallel Plot",
                          x_title = "",
                          y_title ="",
                          legend_title = "",
                          col_names = colnames(dplyr::select_if(input_df, is.numeric)),
                          scale = "globalminmax"){

  new_df <- dplyr::select_if(input_df, is.numeric)
  new_df <- cbind(new_df, group_vector)

  par_plot <- GGally::ggparcoord(new_df,
                         columns = 1:(ncol(new_df)-1),
                         groupColumn = ncol(new_df),
                         showPoints = FALSE,
                         scale = scale, # "globalminmax" is default
                         alphaLines = 0.5
  ) +
    ggplot2::ggtitle(title) +
    ggplot2::xlab(x_title) +
    ggplot2::ylab(y_title) +
    ggplot2::labs(color = legend_title) +
    viridis::scale_color_viridis(discrete=TRUE, end = 1) +
    ggplot2::geom_point(shape = 20, size = 0.5) +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5, family="AvantGarde"),
          axis.text.x = ggplot2::element_text(angle = 45, size = 7, hjust = 1, colour = "grey40", family="AvantGarde"),
          axis.text.y = ggplot2::element_text(size = 7, colour = "grey40", family="AvantGarde"),
          axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
          axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
          legend.text = ggplot2::element_text(size = 7, colour = "grey40", family="AvantGarde"),
          legend.title = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
          legend.position = "right") +
    ggplot2::theme_minimal()


  if(out_path != "none"){
    print(paste("Saving plot to ", out_path, "_parcoord.png", sep = ""))
    ggplot2::ggsave(paste(out_path, "_parcoord.png", sep = ""),
           plot = par_plot)
  }
  else{
    par_plot
  }
}


#' Spider Chart
#'
#' @description `spider chart` takes a minimized data frame (one value per group) and prints a spider chart
#' @details {This function takes a data frame of with one value per group (i.e. calculate mean groupwise).
#' This minimized data frame is used to draw a spider chart (also radar chart odr network plot).
#' The ideal numer of parameters for a spider chart is <= 10. Also the shape of the graph depends on
#' the order of parameters. If the data has large differences in size, normalizing or scaling the data is necessary.
#' Doesn't work with non normal data.}
#' @param minimized_df data frame. A data frame that contains only one value per group and variable. Most often this will be a data frame of means calculated from another data frame.
#' @param title string. Main title of the chart. Default = "Spider chart"
#' @param legend_lab vector. Lables. Default: rownames(minimized_df)
#' @param out_path string. Path to save spider chart to png. If out_path is empty, the spider chart is printed to the device.
#' @export
#' @examples
#' minimized_iris <- aggregate(dplyr::select_if(iris, is.numeric),
#'                             by = list(iris$Species),
#'                             FUN = mean)
#' rownames(minimized_iris) <- minimized_iris$Group.1
#' spider_chart(minimized_iris, title = "Spider chart of iris", legend_lab = c(1, 2, 3))
spider_chart <- function(minimized_df,
                         title = "Spider chart",
                         legend_lab = rownames(minimized_df),
                         out_path = "none"){

  out_name <- paste(out_path, "_spiderChart", ".png", sep = "")

  func <- function(){
    graphics::par(mfrow = c(1, 1))
    spider_data <- dplyr::select_if(minimized_df, is.numeric) # remove column with rownames
    spider_labels <- colnames(spider_data) # set max. label length to 10 characters

    spider_min <- floor(min(spider_data))
    spider_max <- ceiling(max(spider_data))
    spider_interval <- (spider_max-spider_min)/4
    spider_data <- as.data.frame(dplyr::select_if(spider_data, is.numeric))


    spider_data <- rbind(spider_min, spider_max, spider_data) # add max and min to the dataframe to plot the grid


    colors_border = as.vector(viridis::viridis(n = nrow(minimized_df), option = "viridis")) ## set colors
    colors_in = scales::alpha(colors_border, alpha = 0.1)


    graphics::par(mfrow = c(1, 1)) # radar chart
    fmsb::radarchart(spider_data,
               axistype=1,
               caxislabels = c(spider_min,
                               spider_min+spider_interval,
                               spider_min+2*spider_interval,
                               spider_min+3*spider_interval,
                               spider_min+4*spider_interval),

               pcol=colors_border, #custom polygon
               pfcol=colors_in,
               pty = 20,
               plwd=2,
               plty=1,

               cglcol="grey", # custom grid
               cglty=1,
               axislabcol="grey",
               cglwd=0.8,
               # custom labels
               vlcex=0.6,
               vlabels = spider_labels,
               centerzero = FALSE)

    graphics::title(main = title, cex.main = 0.9, font.main = 1)


    graphics::legend(x=-1.4, # Add a legend
           y=1.1,
           legend = legend_lab,
           bty = "n",
           pch=20,
           col=colors_border,
           text.col = "black",
           cex=0.7, pt.cex=1.3)
  }


  if(out_path != "none"){ # print to device or save
    print(paste("Saving to ", out_name))
    grDevices::png(filename = out_name)
    func()
    grDevices::dev.off()
  }
  else{
    func()
  }
}

#' Simple barchart
#'
#' `simple_barchart` returns a ggplot barchart with viridis coloring
#' @param data_frame data frame with at least one numeric and one factor column
#' @param x vector. Factor column
#' @param y vector. Numeric column
#' @param fill vector. Factor column to fill bars by. By default all bars are blue.
#' @param title string. Main title of the plot. Default = ""
#' @param xlab string. Title of x-axis. Default = ""
#' @param ylab string. Title of y-axis. Default = ""
#' @example
#' iris_table <- reshape::melt(table(iris$Species))
#' simple_barchart(iris_table, x = iris_table$Var.1, y = iris_table$value, fill = "#35608DFF")
simple_barchart <- function(data_frame, x, y, fill = "#35608DFF",
                            title = "test", xlab = "", ylab = ""){
  simple_barchart <- ggplot2::ggplot(data_frame, ggplot2::aes(x=x, y=y, fill=fill)) +
    ggplot2::geom_bar(stat="identity") +
    ggplot2::labs(title = title,
         x = xlab,
         y = ylab) +
    viridis::scale_fill_viridis(discrete = TRUE) +
    ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5, family="AvantGarde"),
                   axis.text.x = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
                   axis.text.y = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
                   axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
                   axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
                   legend.position = "none") +
    ggplot2::theme_minimal()


  if(is.character(fill)){
    simple_barchart <- simple_barchart + ggplot2::geom_bar(stat="identity", fill = fill)
  }
  simple_barchart
}

#' Plot ratio barcharts
#' @description `plot_ratio_barcharts` plots the ratios of a list of values from a data frame by group.
#' @param data_frame data frame. From aggregate() function.
#' @param subset_vector vector of column names from the data_frame.
#' @param group_column string. Name of the column to group by.
#' @example
#' aggregated_iris <- aggregate(iris[-5], by = list(iris$Species), FUN = mean)
#' plot_ratio_barcharts(aggregated_iris, c("Sepal.Length", "Sepal.Width", "Petal.Length"), group_column = "Group.1")
plot_ratio_barcharts <- function(data_frame,
                                 subset_vector,
                                 group_column){
  mysubset <- subset(data_frame, select = subset_vector)
  myratios <- lapply(1:nrow(mysubset),
                     function(i) calculate_ratio_matrix(as.numeric(mysubset[i, ]),
                                                        names_vector = colnames(mysubset)))
  names(myratios) <- as.character(data_frame[[group_column]])

  myratios <- reshape::melt(myratios)
  myratios$ratio_name <- as.factor(paste(myratios$X1, "/", myratios$X2))
  myratios <- myratios[-(1:2)]

  my_ratio_list <- lapply(1:length(levels(myratios$ratio_name)),
                          function(i) subset(myratios, myratios$ratio_name == levels(myratios$ratio_name)[i]))
  names(my_ratio_list) <- levels(myratios$ratio_name)

  ratio_barcharts <- lapply(1:length(my_ratio_list),
                            function(i) simple_barchart(my_ratio_list[[i]],
                                                        x = my_ratio_list[[i]]$L1,
                                                        y = my_ratio_list[[i]]$value,
                                                        fill = as.factor(my_ratio_list[[i]]$L1),
                                                        title = my_ratio_list[[i]]$ratio_name,
                                                        xlab = "group",
                                                        ylab = "ratio"))
  ratio_barcharts
}
lisaschneider0509/lipidomeComparisonR documentation built on Aug. 12, 2020, 12:52 a.m.