R/FC.pval.R

Defines functions make_FC.pval_plot make_FC.pval_df_helper make_FC.pval_df

Documented in make_FC.pval_df make_FC.pval_df_helper make_FC.pval_plot

#' Find p-values and fold-change (FC) for each group
#'
#' Find p-values and fold-change (FC) for each group specified in rowAnn_col across many columns of a data frame
#'
#' @param df A data frame - first column: rowAnnotation column with groups, the rest of the columns are values
#' @param rowAnn_col A column index (numeric) or name in df indicating which groups to stratify by
#' @param rev Logical indicating whether to reverse the order of the default FC (default is TRUE), ex. low-hig --> hig-low
#' @param group_name_sep A character indicating how the labels for groups should be presented, "/" in low/high
#' @param vs.other logical, indicating whether to compare all groups to other, eg. high vs (low+intermed)
#' @param FC.method Fold change computation method to use, either "divide" (for non-transformed values) or "subtract" (for log2-transformed values)
#' @param p.method Significance test to use, either "t.test" or "wilcox"/"wilcox.test"
#' @return A data frame with p-vals and fold-changes for each group (rowAnn) in each variable (column)
#' @export
make_FC.pval_df <- function(df, rowAnn_col = 1, rev = T, vs.other = T, group_name_sep = "/", FC.method = "divide", p.method = "wilcox.test") {
  tryCatch(expr = {
    if (is.numeric(rowAnn_col)) {
      rowAnn_col <- colnames(df)[rowAnn_col]
    }

    # Get all variables stored in column names
    all_vars <- colnames(df)[!colnames(df) %in% rowAnn_col]

    l2 <- lapply(all_vars, function(v) {
      # Make a new data frame with row annotations and variable
      df3 <- df[, c(rowAnn_col, v)]

      # Remove variables that don't have enough values for a certain group by returning NULL
      df3 <- df3[!is.na(df3[, v]), ]
      counts <- table(df3[, rowAnn_col]) %>% data.frame()

      if (any(counts$Freq < 4)) {
        return(NULL)
      }

      # Get p-values and fold-change for each group
      df4 <- make_FC.pval_df_helper(df3, rowAnn_col = rowAnn_col, val_col = v, rev = rev, group_name_sep = group_name_sep, FC.method = FC.method, p.method = p.method)

      # Make "other" comparisons
      if (vs.other) {
        # Groups within annotation column e.g. low, high
        groups <- df[, rowAnn_col] %>%
          unique() %>%
          as.character()
        # Annotations in a vector
        x <- df3[, rowAnn_col] %>% as.character()

        # For each group, find p-values/FC, i.e. high vs other, low vs other
        for (group in groups) {
          y <- x
          y[x != group] <- "other"
          df3[, rowAnn_col] <- y
          df4 <- rbind(df4, make_FC.pval_df_helper(df3, rowAnn_col = rowAnn_col, val_col = v, rev = rev, group_name_sep = group_name_sep, FC.method = FC.method, p.method = p.method))
        }
      }
      return(df4)
    })
    names(l2) <- all_vars

    # Remove NULL list elements
    l2[sapply(l2, is.null)] <- NULL

    # List to data frame
    d7 <- do.call(rbind.data.frame, l2)
    d7$Var <- get_nth_part(rownames(d7), "\\.", 1)

    return(d7)
  }) # end try catch
}

#' Find p-values and fold-change (FC) for each group
#'
#' @param p_df3 A data frame with 2 columns: 1) row annotations (rowAnn), 2) numeric values
#' @param rowAnn_col Column in df3 with the groups
#' @param val_col Column in df3 with the numeric values
#' @param rev Logical indicating whether to reverse the order of the default FC (default is TRUE), ex. low-hig --> hig-low
#' @param group_name_sep A character indicating how the labels for groups should be presented, "/" in low/high
#' @param FC.method Fold change computation method to use, either "divide" (for non-transformed values) or "subtract" (for log2-transformed values)
#' @param p.method Significance test to use, either "t.test" or "wilcox"/wilcox.test"
#' @return A data frame with p-values and fold-changes
#' @section Output data frame:
#'          group    p.value Fold.change
#' 1 intermed/high 0.03457640   0.2216307
#' 2      low/high 0.02586801   0.5496807
#' 3  low/intermed 0.65911963   2.4801647
#' @export
make_FC.pval_df_helper <- function(p_df3, rowAnn_col = 1, val_col = 2, rev = F, group_name_sep = "/", FC.method = "divide", p.method = "wilcox.test") {
  # Get p values
  if (isTRUE(grepl("wilcox", p.method))) {
    p_df4 <- perform_wilcox(p_df3[, val_col], p_df3[, rowAnn_col]) %>%
      melt
    p_df4 <- p_df4[!is.na(p_df4$value), ]
  } else {
    p_df4 <- perform_t.test(p_df3[, val_col], p_df3[, rowAnn_col]) %>%
      melt
    p_df4 <- p_df4[!is.na(p_df4$value), ]
  }

  # If there are no values return
  if (nrow(p_df4) == 0) {
    return()
  }
  # Reverse order of factors
  if (rev) {
    p_df4[, c("Var1", "Var2")] <- p_df4[, c("Var2", "Var1")]
  }

  # Get median of each group
  df5 <- medians_of_groups(p_df3[, val_col], p_df3[, rowAnn_col])

  # Rename rows
  rownames(df5) <- df5[, 1]
  # > df5
  #              Var        value
  # high         high 1.942304e-05
  # intermed intermed 1.264453e-05
  # low           low 1.343557e-05

  # Calculate quotient (FC) of medians depending on permutations of groups specified in p_df4
  l <- lapply(1:nrow(p_df4), function(i) {
    if (FC.method == "divide") {
      df5[as.character(p_df4$Var1[i]), 2] / df5[as.character(p_df4$Var2[i]), 2]
    } else {
      df5[as.character(p_df4$Var1[i]), 2] - df5[as.character(p_df4$Var2[i]), 2]
    }
  })

  # Return data frame of combining FC and pvalue info for this iteration
  data.frame(
    group = factor(paste(p_df4$Var1, p_df4$Var2, sep = group_name_sep)), # paste(substring(p_df4$Var1,1,3), substring(p_df4$Var2,1,3), sep="-"),
    p.value = p_df4$value,
    Fold.change = do.call(rbind.data.frame, l) %>% unlist() %>% unname()
  )
}


#' Plot FC-p value heatmap
#'
#' Plot grid of FC values as fill (with color scale) and p-value as stars/numbers in the centre of each tile.
#'
#' @param df A data frame with these exact columns 1) group (ie. group comparisons on x-axis), 2) p.value, 3) Fold.change, 4) Var (y-axis), created using \code{\link{make_FC.pval_df}}.
#' @section Example of input data frame:
#' group   p.value       Fold.change      Var
#' neo/non 0.6747883 1.5617445    CD11B
#' neo/non 0.4820339 1.7443427      CD3
#' neo/non 0.9082967 1.0589647      CD4
#' neo/non 0.9691466 0.9896168     CD45
#' @param xlab X axis label.
#' @param ylab Y axis label.
#' @param plot_title Title of plot.
#' @param out_dir The output directory where the plot will be saved, default is current working directory.
#' @param pval.label How p-values are represented. Allowed values are "p.signif" (stars) and "p.format" (number).
#' @param gradient_palette RColorBrewer palette for fold-change values. See RColorBrewer::display.brewer.all() for all options.
#' @param group_name_sep A character indicating how the labels for groups should be presented, "/" in low/high
#' @param trim_x Number, indicating the number of characters for each part (that is, length of truncated output string).
#' @param pval_size Size of p-values.
#' @param pval_color Color of p-values.
#' @param log2FC Logical (TRUE/FALSE). Should log2 transformation be applied to Fold.change column before plotting?
#' @param scale_FC Either "scale_column", "scale_row", "none", or "cap_outliers" (default). Should scale be applied to FC for each group (column), Var (row)? In "cap_outliers", non-outliers are unscaled, but upper and lower outliers (points outside 1st/3rd quartiles respectively) become upper and lower values of range of non-outliers.
#' @param rescale_to A numeric vector of length 2, indicating lower and upper limits of scale. Default is 0 to 1: c(0,1). Only applied if "scale_FC" parameter is not "none".
#' @param x_axis_angle Angle of the x-axis label. Default is 0 (horizontal), 90 is vertical.
#' @param save.to.file If TRUE, save plot to file in out_dir. If FALSE, print to panel.
#' @param font_size The size of text labels plot. legend title. The size of plot title, axis text, legend text is font_size. The size of plot subtitle is font_size / 1.5.
#' @param line_size The thickness of grid lines.
#' @param alphabetical_row Logical; should the y axis be sorted alphabetically or preserve the order of df$Var?
#' @return Plot object if save.to.file is FALSE.
#' @export
make_FC.pval_plot <- function(df, x_lab = "", y_lab = "", plot_title = "", out_dir = ".", pval.label = "p.signif", gradient_palette = "RdBu",
                              group_name_sep = "/", trim_x = 3, pval_size = 8, pval_color = "white", log2FC = F, scale_FC = "cap_outliers", rescale_to = c(0, 1),
                              x_axis_angle = 0, save.to.file = F, font_size = 10, line_size = 1, alphabetical_row = F) {
  # # Error checking
  #   errorCondition(message = "Ensure scale_FC parameter in make_FC.pval_plot has value of: 'scale_column', 'scale_row', 'none', or 'cap_outliers'")
  #   return()
  # }

  # Apply log transformation
  if (log2FC) {
    df$Fold.change <- log2(df$Fold.change)
  }

  # Apply scale to each group/Variable
  if (scale_FC == "scale_column" | scale_FC == "scale_row") {
    scale_by <- ifelse(scale_FC == "scale_row", "Var", "group")
    # Apply scale
    df$Fold.change <- ave(as.numeric(df$Fold.change), df[, scale_by], FUN = function(x) {
      scales::rescale(x, to = rescale_to)
    })
  }

  # Apply limit to outliers, upper outliers become upper limit of scale and lower becomes lower limit
  if (scale_FC == "cap_outliers") {
    # Find outliers (logical)
    outliers <- get_outliers(df$Fold.change)
    # Rescale non-outliers
    unchanged <- !outliers %in% c("upper", "lower")
    # df$Fold.change[unchanged] <- scales::rescale(df$Fold.change[unchanged], to = rescale_to)
    # # Make outliers limit
    df$Fold.change[outliers == "lower"] <- df$Fold.change[unchanged] %>% min(na.rm = T) # rescale_to[1]
    df$Fold.change[outliers == "upper"] <- df$Fold.change[unchanged] %>% max(na.rm = T) # rescale_to[2]
  }

  # Add stars
  if (pval.label == "p.signif") {
    df$p_stars <- pval_to_stars(df$p.value)
  } else {
    df$p.value <- round(df$p.value, 3)
  }

  # Trim x axis # alt to scale_x_discrete(labels = function(x) strtrim(x, 3))
  if (!isFALSE(trim_x)) {
    n_orig <- df$group %>%
      unique() %>%
      length()
    x <- trim_each_part(df$group, trim_x = trim_x, split = group_name_sep)
    n_new <- x %>%
      unique() %>%
      length()
    if (n_orig != n_new) {
      x_axis_angle <- 45
    } else {
      df$group <- x
    }
  }

  # Manually order high/low/oth comparisons
  e <- unique(df$group)
  e <- e[!is.na(e)]
  lvl_order <- c("hig/low", "hig/int", "int/low", "hig/oth", "int/oth", "low/oth")

  if (all(e %in% lvl_order)) {
    # If all the elements are in the vector above, order x axis according to level order
    df$group <- factor(df$group, levels = lvl_order[lvl_order %in% e])
  } else {
    # Move "/other" to right of heatmap
    other <- ifelse(isFALSE(trim_x), "other", strtrim("other", trim_x)) %>% grepl(e)
    df$group <- factor(df$group, levels = c(as.character(e[!other]), as.character(e[other])))
  }

  # Color gradient for heatmap
  pal_grad <- get_col_palette(gradient_palette, rev = T) %>% get_col_gradient(100)

  # Should rows not be sorted? Then factor current order to keep levels
  df$Var <- as.character(df$Var)
  if (isFALSE(alphabetical_row)) {
    df$Var <- factor(df$Var, levels = unique(df$Var))
  }

  # Plot
  p <- ggplot(df, aes(x = group, y = Var, fill = Fold.change)) +
    geom_tile() +
    ggtitle(paste(plot_title)) +
    labs(
      title = plot_title,
      caption = ifelse(!isFALSE(pval.label), "p <= 0.001 '****', 0.001 '***', 0.01 '**', 0.05 '*'", ""),
      subtitle = out_dir,
      x = x_lab,
      y = y_lab
    ) +
    scale_fill_gradientn(colors = pal_grad, name = paste0(ifelse(log2FC, "log2 ", ""), "FC ", ifelse(scale_FC == "none", "", scale_FC))) +
    scale_x_discrete(expand = c(0, 0)) + # remove space between grid and axes
    scale_y_discrete(expand = c(0, 0)) +
    theme(
      panel.background = element_blank(), # remove background color and lines
      plot.title = element_text(colour = "black", size = font_size),
      plot.subtitle = element_text(colour = "black", size = font_size / 1.5),
      axis.line = element_line(colour = "black", size = line_size), # increase the axis-line thickness and change the color to blac
      # Ticks
      axis.ticks = element_line(colour = "black", size = line_size), # increase the tick thickness)
      axis.ticks.length = unit(.25, "cm"),
      # Axes labels
      axis.text = element_text(colour = "black", size = font_size), # face = "bold"),
      axis.text.x = element_text(margin = margin(t = 7, r = 0, b = 0, l = 0), angle = x_axis_angle, vjust = 0.5), # increase space between x axis title and labels
      axis.text.y = element_text(margin = margin(t = 0, r = 7, b = 0, l = 0)),
      # axes tick labels
      axis.title = element_text(colour = "black", size = font_size, face = "bold"), # axes title labels
      # legend
      legend.text = element_text(colour = "black", size = font_size, face = "bold"),
      legend.title = element_text(colour = "black", size = font_size, face = "bold"),
      # legend.position = "bottom", legend.box = "vertical"
    )

  # Add stars if applicable
  if (!isFALSE(pval.label)) {
    if (pval.label == "p.signif") {
      p <- p +
        geom_text(aes(label = p_stars), size = pval_size, color = pval_color, vjust = 0.8)
    }
    if (pval.label == "p.format") {
      p <- p +
        geom_text(aes(label = p.value), size = pval_size, color = pval_color, vjust = 0.5)
    }
  }

  # Save to file
  if (save.to.file) {
    # Graphing params
    file_h <- (length(unique(df$Var)) + 4) / 4 + 2 # file height
    ggsave(device = "pdf", height = file_h, limitsize = F, filename = sprintf("%s/%s_pval_FC.pdf", out_dir, plot_title), plot = p) # , height = nrow(df)*0.6, width = 4)
  } else {
    print(p)
  }
}
kazeera/hourglass documentation built on April 5, 2025, 7:18 a.m.