R/plot_exprs.R

Defines functions plot_exprs

Documented in plot_exprs

#' @title Diagnostic plots showing individual marker distribution before and after mapping by HDStIM
#'
#' @param mapped_data    List output of the \code{\link{HDStIM}} function.
#' @param path           Path to the folder to save figures generated by this function.
#' @param verbose        Logical. To make function more verbose. Default is FALSE.
#'
#' @importFrom tidyr gather
#' @importFrom dplyr select filter
#' @import ggplot2
#' @importFrom ggridges geom_density_ridges
#' @return               A list of ggplot objects. If the path is not NULL, PNG files of the plots are saved in the specified folder.
#' @export
#' @examples
#' mapped_data <- HDStIM(chi11$expr_data, chi11$state_markers,
#'                   chi11$cluster_col, chi11$stim_label,
#'                   chi11$unstim_label, seed_val = 123, umap = FALSE, umap_cells = NULL,
#'                   verbose = FALSE)
#' 
#' pe <- plot_exprs(mapped_data, path = NULL, verbose = FALSE)
plot_exprs <- function(mapped_data, path = NULL, verbose = FALSE){
  # Check if path exists; if not then create it.
  if(!is.null(path)){
    if(!dir.exists(path)){
      if(verbose){message(paste("Creating %s folder", path))}
      dir.create(path, recursive = TRUE)
    } else {
      if(verbose){message(paste(path, "folder already exists. Output will be over written."))}
    }
  }

  # Bind global variables.
  cell_population <- stim_type <- distribution <- marker_exp <- k_cluster_id <- responding_cluster <- p.value <- NULL

  # Define colors.
  cbPalette <- c("#E69F00", "#009E73", "#56B4E9", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

  state_markers <- mapped_data$state_markers
  cellpop_col  <- mapped_data$cellpop_col

  # Convert the pre k-means data from wide to long form for the state markers.
  original_data <- mapped_data$response_mapping_main %>%
    dplyr::select(cell_population, stim_type, all_of(state_markers))
  gather_ori_dat <- data.frame()
  for(state in state_markers){
    temp_df <- original_data %>% dplyr::select(cell_population, stim_type)
    marker_dat <- tidyr::gather(original_data[state], key = "state_marker", value = "marker_exp")
    temp_df <- cbind(temp_df, marker_dat, "distribution" = "stim")
    gather_ori_dat <- rbind(gather_ori_dat, temp_df)
  }

  # Convert the post k-means data from wide to long form for the state markers.
  selec_data <- mapped_data$response_mapping_main %>%
    dplyr::filter(k_cluster_id == responding_cluster & stim_type %in% mapped_data$stim_label) %>%
    dplyr::select(cell_population, stim_type, all_of(state_markers))
  gather_selec_dat <- data.frame()
  for(state in state_markers){
    temp_df <- selec_data %>% dplyr::select(cell_population, stim_type)
    marker_dat <- tidyr::gather(selec_data[state], key = "state_marker", value = "marker_exp")
    temp_df <- cbind(temp_df, marker_dat, "distribution" = "mapped")
    gather_selec_dat <- rbind(gather_selec_dat, temp_df)
  }

  if(length(state_markers) <= 4){
    n_col <- length(state_markers)
    n_row <- 1
  } else if(length(state_markers) > 4){
    n_col <- 4
    n_row_temp <- length(state_markers) / n_col
    if(is.integer(n_row_temp)){
      n_row <- n_row_temp
    } else {
      n_row <- as.integer(n_row_temp) + 1
    }
  }

  comb_f_sig <- mapped_data$all_fisher_p_val %>%
    dplyr::filter(p.value < 0.05)

  allplots <- list()
  for(i in 1:nrow(comb_f_sig)){
    # Plot KDE of original (pre-k-means clustering) distribution overlaid
    # by the KDE of the k-mapped distribution.
    st <- as.character(comb_f_sig[[i,1]])
    cl <- as.character(comb_f_sig[[i,2]])


    clust_stim_k <- gather_selec_dat[gather_selec_dat$cell_population == cl & gather_selec_dat$stim_type == st, ]
    clust_stim_ori <- gather_ori_dat[gather_ori_dat$cell_population == cl & gather_ori_dat$stim_type == st, ]
    clust_unstim_ori <- gather_ori_dat[gather_ori_dat$cell_population == cl & gather_ori_dat$stim_type == mapped_data$unstim_label, ]
    clust_unstim_ori$distribution <- "unstim"
    clust_stim <- as_tibble(rbind(clust_stim_k, clust_stim_ori, clust_unstim_ori))

    base_size <- if(n_col >=4){20}else if(n_col < 4){10}
    theme_set(theme_grey(base_size = base_size))
    p <- ggplot(data = clust_stim, aes(x = marker_exp , y = distribution)) +
      ggridges::geom_density_ridges(aes(fill = distribution), scale = 1) +
      scale_fill_manual(values=cbPalette) +
      labs(title = paste0("Cell Pop.: ", cl, "; Stim.: ", st),
           x = "Marker Expression",
           y = "Distribution",
           fill = "Distribution")

    kde_plot <- p + facet_wrap( ~ state_marker, nrow = n_row, ncol = n_col)

    allplots[[i]] <- kde_plot
    if(!is.null(path)){
      ggsave(paste0("exprs_", cl, "_", st, ".png"), plot = kde_plot, device = "png",
           path = path, width = n_col * 4, height = n_row * 3, dpi = 300)
    }
  }

  return(allplots)
}
niaid/HDStIM documentation built on Oct. 15, 2023, 4:43 p.m.